Merged new importer UI work from branch over.

git-svn-id: http://google-refine.googlecode.com/svn/trunk@2170 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
David Huynh 2011-08-02 03:34:47 +00:00
parent 0fa99d21ca
commit 78edff6f7f
122 changed files with 9647 additions and 4469 deletions

View File

@ -33,8 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
var html = "text/html";
var encoding = "UTF-8";
var version="0.2"
var ClientSideResourceManager = Packages.com.google.refine.ClientSideResourceManager;
var version = "0.2";
/*
* Function invoked to initialize the extension.
@ -43,21 +42,24 @@ function init() {
// Packages.java.lang.System.err.println("Initializing gData extension");
// Packages.java.lang.System.err.println(module.getMountPoint());
Packages.com.google.refine.RefineServlet.registerCommand(
module, "authorize", Packages.com.google.refine.extension.gdata.AuthorizeCommand());
Packages.com.google.refine.RefineServlet.registerCommand(
module, "authorize2", Packages.com.google.refine.extension.gdata.AuthorizeCommand2());
Packages.com.google.refine.RefineServlet.registerCommand(
module, "deauthorize", Packages.com.google.refine.extension.gdata.DeAuthorizeCommand());
var RS = Packages.com.google.refine.RefineServlet;
RS.registerCommand(module, "authorize", Packages.com.google.refine.extension.gdata.AuthorizeCommand());
RS.registerCommand(module, "authorize2", Packages.com.google.refine.extension.gdata.AuthorizeCommand2());
RS.registerCommand(module, "deauthorize", Packages.com.google.refine.extension.gdata.DeAuthorizeCommand());
// Register importer and exporter
Packages.com.google.refine.importers.ImporterRegistry.registerImporter(
"gdata-importer", new Packages.com.google.refine.extension.gdata.GDataImporter());
var IM = Packages.com.google.refine.importing.ImportingManager;
IM.registerFormat("service/gdata", "GData services"); // generic format, no parser to handle it
IM.registerFormat("service/gdata/spreadsheet", "Google spreadsheets", false, "GoogleSpreadsheetParserUI",
new Packages.com.google.refine.extension.gdata.GDataImporter());
IM.registerUrlRewriter(new Packages.com.google.refine.extension.gdata.GDataUrlRewriter())
IM.registerUrlRewriter(new Packages.com.google.refine.extension.gdata.FusionTablesUrlRewriter())
// Packages.com.google.refine.exporters.ExporterRegistry.registerExporter(
// "gdata-exporter", new Packages.com.google.refine.extension.gdata.GDataExporter());
// Script files to inject into /project page
var ClientSideResourceManager = Packages.com.google.refine.ClientSideResourceManager;
ClientSideResourceManager.addPaths(
"project/scripts",
module,

View File

@ -0,0 +1,128 @@
/*
* Copyright (c) 2010, Thomas F. Morris
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* - Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* Neither the name of Google nor the names of its contributors may be used to
* endorse or promote products derived from this software without specific
* prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.extension.gdata;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLEncoder;
import com.google.gdata.client.GoogleService;
import com.google.gdata.client.Service.GDataRequest;
import com.google.gdata.client.Service.GDataRequest.RequestType;
import com.google.gdata.util.ContentType;
import com.google.gdata.util.ServiceException;
import com.google.refine.importing.UrlRewriter;
/**
* @author Tom Morris <tfmorris@gmail.com>
* @copyright 2010 Thomas F. Morris
* @license New BSD http://www.opensource.org/licenses/bsd-license.php
*/
public class FusionTablesUrlRewriter implements UrlRewriter {
@Override
public Result rewrite(String urlString) {
try {
URL url = new URL(urlString);
if (isFusionTableURL(url)) {
Result result = new Result();
try {
result.rewrittenUrl = generateQueryUrl(url, 0, -1).toExternalForm();
result.format = "text/line-based/*sv";
result.download = true;
return result;
} catch (UnsupportedEncodingException e) {
// TODO: what do we do here?
}
}
} catch (MalformedURLException e) {
// Ignore
}
return null;
}
static public boolean isFusionTableURL(URL url) {
// http://www.google.com/fusiontables/DataSource?dsrcid=1219
String query = url.getQuery();
if (query == null) {
query = "";
}
return url.getHost().endsWith(".google.com")
&& url.getPath().startsWith("/fusiontables/DataSource")
&& query.contains("dsrcid=");
}
static public URL generateQueryUrl(URL url, int start, int limit)
throws MalformedURLException, UnsupportedEncodingException {
String tableId = getFusionTableKey(url);
final String SERVICE_URL =
"http://www.google.com/fusiontables/api/query";
final String selectQuery = "select * from " + tableId
+ " offset " + (start) + (limit > 0 ? (" limit " + limit) : "");
return new URL(SERVICE_URL + "?sql=" + URLEncoder.encode(selectQuery, "UTF-8"));
}
static public InputStream openInputStream(URL queryUrl) throws IOException, ServiceException {
GoogleService service = new GoogleService("fusiontables", GDataExtension.SERVICE_APP_NAME);
// String token = TokenCookie.getToken(request);
// if (token != null) {
// service.setAuthSubToken(token);
// }
GDataRequest queryRequest = service.getRequestFactory().getRequest(
RequestType.QUERY, queryUrl, ContentType.TEXT_PLAIN);
queryRequest.execute();
return queryRequest.getResponseStream();
}
static private String getFusionTableKey(URL url) {
String query = url.getQuery();
if (query != null) {
String[] parts = query.split("&");
for (String part : parts) {
if (part.startsWith("dsrcid=")) {
int offset = ("dsrcid=").length();
String tableId = part.substring(offset);
// TODO: Any special id format considerations to worry about?
// if (tableId.startsWith("p") || !tableId.contains(".")) {
// return tableId;
// }
return tableId;
}
}
}
return null;
}
}

View File

@ -0,0 +1,49 @@
/*
* Copyright (c) 2010, Thomas F. Morris
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* - Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* Neither the name of Google nor the names of its contributors may be used to
* endorse or promote products derived from this software without specific
* prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.extension.gdata;
import com.google.gdata.client.spreadsheet.FeedURLFactory;
/**
* @author Tom Morris <tfmorris@gmail.com>
* @copyright 2010 Thomas F. Morris
* @license New BSD http://www.opensource.org/licenses/bsd-license.php
*/
abstract public class GDataExtension {
static final String SERVICE_APP_NAME = "Google-Refine-GData-Extension";
static private FeedURLFactory factory;
static public FeedURLFactory getFeedUrlFactory() {
if (factory == null) {
// Careful - this is shared by everyone.
factory = FeedURLFactory.getDefault();
}
return factory;
}
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2010,2011. Thomas F. Morris
* Copyright (c) 2010, Thomas F. Morris
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -29,281 +29,125 @@
package com.google.refine.extension.gdata;
import java.io.IOException;
import java.io.Serializable;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import java.util.Scanner;
import java.util.regex.MatchResult;
import java.util.regex.Pattern;
import com.google.gdata.client.GoogleService;
import com.google.gdata.client.Service.GDataRequest;
import com.google.gdata.client.Service.GDataRequest.RequestType;
import org.json.JSONObject;
import com.google.gdata.client.spreadsheet.CellQuery;
import com.google.gdata.client.spreadsheet.FeedURLFactory;
import com.google.gdata.client.spreadsheet.SpreadsheetService;
import com.google.gdata.data.spreadsheet.Cell;
import com.google.gdata.data.spreadsheet.CellEntry;
import com.google.gdata.data.spreadsheet.CellFeed;
import com.google.gdata.data.spreadsheet.ListEntry;
import com.google.gdata.data.spreadsheet.ListFeed;
import com.google.gdata.data.spreadsheet.SpreadsheetEntry;
import com.google.gdata.data.spreadsheet.SpreadsheetFeed;
import com.google.gdata.data.spreadsheet.WorksheetEntry;
import com.google.gdata.data.spreadsheet.WorksheetFeed;
import com.google.gdata.util.ContentType;
import com.google.gdata.util.InvalidEntryException;
import com.google.gdata.util.ServiceException;
import com.google.refine.ProjectMetadata;
import com.google.refine.expr.ExpressionUtils;
import com.google.refine.importers.ImporterUtilities;
import com.google.refine.importers.UrlImporter;
import com.google.refine.model.Cell;
import com.google.refine.model.Column;
import com.google.refine.importers.TabularImportingParserBase;
import com.google.refine.importing.ImportingJob;
import com.google.refine.importing.ImportingUtilities;
import com.google.refine.model.Project;
import com.google.refine.model.Row;
import com.google.refine.util.JSONUtilities;
/**
* Google Refine importer for Google Spreadsheets.
* Google Refine parser for Google Spreadsheets.
*
* @author Tom Morris <tfmorris@gmail.com>
* @copyright 2010 Thomas F. Morris
* @license New BSD http://www.opensource.org/licenses/bsd-license.php
*/
public class GDataImporter implements UrlImporter {
static final String SERVICE_APP_NAME = "Google-Refine-GData-Extension";
private FeedURLFactory factory;
public class GDataImporter extends TabularImportingParserBase {
public GDataImporter() {
// Careful - this constructor is called at server init time
// and is shared by everyone.
factory = FeedURLFactory.getDefault();
super(false);
}
@Override
public void read(URL url, Project project, ProjectMetadata metadata,
Properties options) throws Exception {
public void parseOneFile(
Project project,
ProjectMetadata metadata,
ImportingJob job,
JSONObject fileRecord,
int limit,
JSONObject options,
List<Exception> exceptions
) throws IOException {
String fileSource = ImportingUtilities.getFileSource(fileRecord);
String urlString = JSONUtilities.getString(fileRecord, "url", null);
URL url = new URL(urlString);
int ignoreLines = ImporterUtilities.getIntegerOption("ignore", options, -1);
int headerLines = ImporterUtilities.getIntegerOption("header-lines", options, 1);
int limit = ImporterUtilities.getIntegerOption("limit", options, -1);
// Note: Unlike TSV/CSV importer, we count all rows towards skip, not
// just "data" rows
int skip = ImporterUtilities.getIntegerOption("skip", options, 0);
int dataStart = ignoreLines + headerLines + skip;
boolean guessValueType = ImporterUtilities.getBooleanOption(
"guess-value-type", options, true);
// TODO: Put this in a namespace?
metadata.setCustomMetadata("source-url", url.toExternalForm());
// Start fresh for each read so that we're not caching authorization or
// anything
if (isSpreadsheetURL(url)) {
importSpreadsheet(url, project, ignoreLines, headerLines, limit,
dataStart, guessValueType);
} else if (isFusionTableURL(url)) {
importFusionTable(url, project, ignoreLines, headerLines, limit,
dataStart, guessValueType);
} else {
// should never happen (famous last words)
throw new IllegalArgumentException(
"Got invalid format URL in GDataImporter.read()");
}
}
private void importSpreadsheet(URL url, Project project, int ignoreLines,
int headerLines, int limit, int dataStart, boolean guessValueType)
throws MalformedURLException, IOException, ServiceException,
Exception {
SpreadsheetService service = new SpreadsheetService(SERVICE_APP_NAME);
SpreadsheetService service = new SpreadsheetService(GDataExtension.SERVICE_APP_NAME);
// String token = TokenCookie.getToken(request);
// if (token != null) {
// service.setAuthSubToken(token);
// }
String spreadsheetKey = getSpreadsheetKey(url);
int[] sheets = JSONUtilities.getIntArray(options, "sheets");
for (int sheetIndex : sheets) {
WorksheetEntry worksheet;
try {
worksheet = getWorksheetEntries(service, spreadsheetKey).get(0);
} catch (InvalidEntryException e) {
throw new RuntimeException("Failed to open spreadsheet "
+ e.getResponseBody(), e);
}
// Create columns
List<String> columnHeaders = getColumnHeaders(service, worksheet,
ignoreLines, headerLines);
int columnCount = worksheet.getColCount();
project.columnModel.setMaxCellIndex(columnCount);
boolean validColumn[] = new boolean[columnCount];
int index = 0;
for (String name : columnHeaders) {
Column column = new Column(index, name + " " + index);
project.columnModel.columns.add(column);
validColumn[index++] = true;
}
for (int i = index; index < columnCount; index++) {
Column column = new Column(index, "Column " + index);
project.columnModel.columns.add(column);
validColumn[i] = true;
}
// Create data rows & cells
int previousRow = dataStart - 1;
int previousCol = -1;
List<CellEntry> cellEntries = getCells(service, worksheet, dataStart);
Row row = null;
for (CellEntry cellEntry : cellEntries) {
com.google.gdata.data.spreadsheet.Cell cell = cellEntry.getCell();
if (cell == null) {
worksheet = getWorksheetEntries(service, spreadsheetKey).get(sheetIndex);
} catch (ServiceException e) {
exceptions.add(e);
continue;
}
int r = cell.getRow() - 1; // convert from 1-based to 0-based
int c = cell.getCol() - 1;
if (limit > 0 && r > limit) {
break;
readTable(
project,
metadata,
job,
new BatchRowReader(service, worksheet, 20),
fileSource + "#" + worksheet.getTitle().getPlainText(),
limit,
options,
exceptions
);
}
}
// Handle gaps in rows
if (r > previousRow) {
// Finish and add current row
if (row != null) {
project.rows.add(row);
// project.columnModel.setMaxCellIndex(row.cells.size()); //
// TODO: ???
static private class BatchRowReader implements TableDataReader {
final int batchSize;
final SpreadsheetService service;
final WorksheetEntry worksheet;
final int totalRowCount;
int nextRow = 0; // 0-based
int batchRowStart = -1; // 0-based
List<List<Object>> rowsOfCells = null;
public BatchRowReader(SpreadsheetService service, WorksheetEntry worksheet, int batchSize) {
this.service = service;
this.worksheet = worksheet;
this.batchSize = batchSize;
this.totalRowCount = worksheet.getRowCount();
}
// Add empty rows for skipped rows
while (previousRow < r - 1) {
project.rows.add(new Row(columnCount));
previousRow++;
@Override
public List<Object> getNextRowOfCells() throws IOException {
if (rowsOfCells == null || nextRow > batchRowStart + rowsOfCells.size()) {
batchRowStart = batchRowStart + (rowsOfCells == null ? 0 : rowsOfCells.size());
if (batchRowStart < totalRowCount) {
try {
rowsOfCells = getRowsOfCells(service, worksheet, batchRowStart + 1, batchSize);
} catch (ServiceException e) {
rowsOfCells = null;
throw new IOException(e);
}
row = new Row(columnCount);
previousRow = r;
previousCol = 0;
}
// Add blank cells for any that were skipped before the current one
for (int col = previousCol + 1; col < c; col++) {
row.cells.add(new Cell("", null));
}
previousCol = c;
String s = cell.getValue();
if (s != null) {
s = s.trim();
}
if (ExpressionUtils.isNonBlankData(s)) {
Serializable value = guessValueType ? ImporterUtilities
.parseCellValue(s) : s;
row.cells.add(new Cell(value, null));
} else {
row.cells.add(null);
}
}
// Add last row
if (row != null) {
project.rows.add(row);
rowsOfCells = null;
}
}
private void importFusionTable(URL url, Project project, int ignoreLines,
int headerLines, int limit, int dataStart, boolean guessValueType)
throws MalformedURLException, IOException, ServiceException,
Exception {
GoogleService service = new GoogleService("fusiontables", SERVICE_APP_NAME);
// String token = TokenCookie.getToken(request);
// if (token != null) {
// service.setAuthSubToken(token);
// }
String tableId = getFusionTableKey(url);
final String SERVICE_URL =
"http://www.google.com/fusiontables/api/query";
final String selectQuery = "select * from " + tableId
+ " offset " + (dataStart) + (limit>0 ? (" limit " + limit):"");
URL queryUrl = new URL(
SERVICE_URL + "?sql=" + URLEncoder.encode(selectQuery, "UTF-8"));
GDataRequest queryRequest = service.getRequestFactory().getRequest(
RequestType.QUERY, queryUrl, ContentType.TEXT_PLAIN);
queryRequest.execute();
Scanner scanner = new Scanner(queryRequest.getResponseStream(),"UTF-8");
// TODO: Just use the first row of data as column headers for now
List<String> columnHeaders = getTableRow(scanner);
// Create columns
int columnCount = columnHeaders.size();
project.columnModel.setMaxCellIndex(columnCount);
boolean validColumn[] = new boolean[columnCount];
int index = 0;
for (String name : columnHeaders) {
Column column = new Column(index, name + " " + index);
project.columnModel.columns.add(column);
validColumn[index++] = true;
}
for (int i = index; index < columnCount; index++) {
Column column = new Column(index, "Column " + index);
project.columnModel.columns.add(column);
validColumn[i] = true;
}
// Create data rows & cells
List<String> values = columnHeaders;
while (values != null) {
Row row = new Row(columnCount);
for (String valString : values) {
valString = valString.trim();
if (ExpressionUtils.isNonBlankData(valString)) {
Serializable value = guessValueType ? ImporterUtilities
.parseCellValue(valString) : valString;
row.cells.add(new Cell(value, null));
if (rowsOfCells != null && nextRow - batchRowStart < rowsOfCells.size()) {
return rowsOfCells.get(nextRow++ - batchRowStart);
} else {
row.cells.add(null);
}
}
project.rows.add(row);
values = getTableRow(scanner);
}
}
private List<String> getTableRow(Scanner scanner) {
/**
* CSV values are terminated by comma or end-of-line and consist either of
* plain text without commas or quotes, or a quoted expression, where inner
* quotes are escaped by doubling.
*/
final Pattern CSV_VALUE_PATTERN =
Pattern.compile("([^,\\r\\n\"]*|\"(([^\"]*\"\")*[^\"]*)\")(,|\\r?\\n)");
if (!scanner.hasNextLine()) {
return null;
}
List<String> result = new ArrayList<String>();
while (scanner.hasNextLine()) {
scanner.findWithinHorizon(CSV_VALUE_PATTERN, 0);
MatchResult match = scanner.match();
String quotedString = match.group(2);
String decoded = quotedString == null ? match.group(1)
: quotedString.replaceAll("\"\"", "\"");
result.add(decoded);
if (!match.group(4).equals(",")) {
break;
}
}
return result;
}
/**
* Retrieves the spreadsheets that an authenticated user has access to. Not
@ -313,130 +157,67 @@ public class GDataImporter implements UrlImporter {
* @throws Exception
* if error in retrieving the spreadsheet information
*/
public List<SpreadsheetEntry> getSpreadsheetEntries(
SpreadsheetService service) throws Exception {
static public List<SpreadsheetEntry> getSpreadsheetEntries(
SpreadsheetService service
) throws Exception {
SpreadsheetFeed feed = service.getFeed(
factory.getSpreadsheetsFeedUrl(), SpreadsheetFeed.class);
GDataExtension.getFeedUrlFactory().getSpreadsheetsFeedUrl(),
SpreadsheetFeed.class);
return feed.getEntries();
}
public List<WorksheetEntry> getWorksheetEntries(SpreadsheetService service,
String spreadsheetKey) throws MalformedURLException, IOException,
ServiceException {
WorksheetFeed feed = service
.getFeed(factory.getWorksheetFeedUrl(spreadsheetKey, "public",
"values"), WorksheetFeed.class);
static public List<WorksheetEntry> getWorksheetEntries(
SpreadsheetService service, String spreadsheetKey
) throws MalformedURLException, IOException, ServiceException {
WorksheetFeed feed = service.getFeed(
GDataExtension.getFeedUrlFactory().getWorksheetFeedUrl(spreadsheetKey, "public", "values"),
WorksheetFeed.class);
return feed.getEntries();
}
/**
* Retrieves the columns headers from the cell feed of the worksheet entry.
*
* @param worksheet
* worksheet entry containing the cell feed in question
* @return a list of column headers
* @throws Exception
* if error in retrieving the spreadsheet information
*/
public List<String> getColumnHeaders(SpreadsheetService service,
WorksheetEntry worksheet, int startRow, int rows) throws Exception {
List<String> headers = new ArrayList<String>();
// Get the appropriate URL for a cell feed
static public List<List<Object>> getRowsOfCells(
SpreadsheetService service,
WorksheetEntry worksheet,
int startRow, // 1-based
int rowCount
) throws IOException, ServiceException {
URL cellFeedUrl = worksheet.getCellFeedUrl();
// Create a query for the cells in the header row(s) (1-based)
CellQuery cellQuery = new CellQuery(cellFeedUrl);
if (startRow > 0) {
cellQuery.setMinimumRow(startRow + 1);
}
cellQuery.setMaximumRow(startRow + rows);
// Get the cell feed matching the query
CellFeed topRowCellFeed = service.query(cellQuery, CellFeed.class);
// Get the cell entries from the feed
List<CellEntry> cellEntries = topRowCellFeed.getEntries();
for (CellEntry entry : cellEntries) {
// Get the cell element from the entry
com.google.gdata.data.spreadsheet.Cell cell = entry.getCell();
int r = cell.getRow() - 1;
if (cell != null) {
if (r == startRow) {
headers.add(cell.getValue().trim());
} else if (r < startRow + rows) {
headers.set(r, headers.get(r) + " "
+ cell.getValue().trim());
}
}
}
return headers;
}
public List<CellEntry> getCells(SpreadsheetService service,
WorksheetEntry worksheet, int startRow) throws IOException,
ServiceException {
URL cellFeedUrl = worksheet.getCellFeedUrl();
// Create a query skipping the desired number of rows
CellQuery cellQuery = new CellQuery(cellFeedUrl);
cellQuery.setMinimumRow(startRow + 1); // 1-based
int rows = worksheet.getRowCount();
cellQuery.setMaximumRow(rows);
// cellQuery.setMinimumCol(1);
int minRow = Math.max(1, startRow);
int maxRow = Math.min(worksheet.getRowCount(), startRow + rowCount - 1);
int rows = maxRow - minRow + 1;
int cols = worksheet.getColCount();
CellQuery cellQuery = new CellQuery(cellFeedUrl);
cellQuery.setMinimumRow(minRow);
cellQuery.setMaximumRow(maxRow);
cellQuery.setMaximumCol(cols);
cellQuery.setMaxResults(rows * cols);
cellQuery.setReturnEmpty(false);
CellFeed cellFeed = service.query(cellQuery, CellFeed.class);
return cellFeed.getEntries();
List<CellEntry> cellEntries = cellFeed.getEntries();
List<List<Object>> rowsOfCells = new ArrayList<List<Object>>(rows);
for (CellEntry cellEntry : cellEntries) {
Cell cell = cellEntry.getCell();
int row = cell.getRow();
int col = cell.getCol();
while (row > rowsOfCells.size()) {
rowsOfCells.add(new ArrayList<Object>(cols));
}
List<Object> rowOfCells = rowsOfCells.get(row - 1); // 1-based
while (col > rowOfCells.size()) {
rowOfCells.add(null);
}
rowOfCells.set(col - 1, cell.getValue());
}
return rowsOfCells;
}
List<ListEntry> getListEntries(SpreadsheetService service,
WorksheetEntry worksheet) throws IOException, ServiceException {
URL listFeedUrl = worksheet.getListFeedUrl();
ListFeed feed = service.getFeed(listFeedUrl, ListFeed.class);
return feed.getEntries();
}
@Override
public boolean canImportData(String contentType, String filename) {
return false;
}
@Override
public boolean canImportData(URL url) {
return isSpreadsheetURL(url) || isFusionTableURL(url);
}
private boolean isSpreadsheetURL(URL url) {
String host = url.getHost();
String query = url.getQuery();
if (query == null) {
query = "";
}
// http://spreadsheets.google.com/ccc?key=tI36b9Fxk1lFBS83iR_3XQA&hl=en
return host.endsWith(".google.com")
&& host.contains("spreadsheet")
&& getSpreadsheetKey(url) != null;
}
private boolean isFusionTableURL(URL url) {
// http://www.google.com/fusiontables/DataSource?dsrcid=1219
String query = url.getQuery();
if (query == null) {
query = "";
}
return url.getHost().endsWith(".google.com")
&& url.getPath().startsWith("/fusiontables/DataSource")
&& getFusionTableKey(url) != null;
}
// Modified version of FeedURLFactor.getSpreadsheetKeyFromUrl()
// Modified version of FeedURLFactory.getSpreadsheetKeyFromUrl()
private String getSpreadsheetKey(URL url) {
String query = url.getQuery();
if (query != null) {
@ -472,23 +253,4 @@ public class GDataImporter implements UrlImporter {
}
return null;
}
private String getFusionTableKey(URL url) {
String query = url.getQuery();
if (query != null) {
String[] parts = query.split("&");
for (String part : parts) {
if (part.startsWith("dsrcid=")) {
int offset = ("dsrcid=").length();
String tableId = part.substring(offset);
// TODO: Any special id format considerations to worry about?
// if (tableId.startsWith("p") || !tableId.contains(".")) {
// return tableId;
// }
return tableId;
}
}
}
return null;
}
}

View File

@ -0,0 +1,69 @@
/*
* Copyright (c) 2010, Thomas F. Morris
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* - Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* Neither the name of Google nor the names of its contributors may be used to
* endorse or promote products derived from this software without specific
* prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.extension.gdata;
import java.net.MalformedURLException;
import java.net.URL;
import com.google.refine.importing.UrlRewriter;
/**
* @author Tom Morris <tfmorris@gmail.com>
* @copyright 2010 Thomas F. Morris
* @license New BSD http://www.opensource.org/licenses/bsd-license.php
*/
public class GDataUrlRewriter implements UrlRewriter {
@Override
public Result rewrite(String urlString) {
try {
URL url = new URL(urlString);
if (isSpreadsheetURL(url)) {
Result result = new Result();
result.rewrittenUrl = urlString;
result.format = "service/gdata/spreadsheet";
result.download = false;
return result;
}
} catch (MalformedURLException e) {
// Ignore
}
return null;
}
static public boolean isSpreadsheetURL(URL url) {
String host = url.getHost();
String query = url.getQuery();
if (query == null) {
query = "";
}
// http://spreadsheets.google.com/ccc?key=tI36b9Fxk1lFBS83iR_3XQA&hl=en
return host.endsWith(".google.com") && host.contains("spreadsheet") && query.contains("key=");
}
}

View File

@ -0,0 +1,19 @@
package com.google.refine;
import java.io.IOException;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import com.google.refine.RefineServlet;
public interface HttpResponder {
public void init(RefineServlet servlet);
public void doPost(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException;
public void doGet(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException;
}

View File

@ -50,7 +50,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.refine.commands.Command;
import com.google.refine.commands.importing.ImportManager;
import com.google.refine.importing.ImportingManager;
import com.google.refine.io.FileProjectManager;
import edu.mit.simile.butterfly.Butterfly;
@ -125,7 +125,7 @@ public class RefineServlet extends Butterfly {
s_dataDir = new File(data);
FileProjectManager.initialize(s_dataDir);
ImportManager.initialize(this);
ImportingManager.initialize(this);
if (_timer == null) {
_timer = new Timer("autosave");

View File

@ -0,0 +1,180 @@
package com.google.refine.commands;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.io.Writer;
import java.util.Properties;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.apache.velocity.VelocityContext;
import org.json.JSONException;
import org.json.JSONObject;
import org.json.JSONWriter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.refine.Jsonizable;
import com.google.refine.RefineServlet;
import com.google.refine.util.ParsingUtilities;
abstract public class HttpUtilities {
final static protected Logger logger = LoggerFactory.getLogger("command");
static public void respond(HttpServletResponse response, String content)
throws IOException, ServletException {
response.setCharacterEncoding("UTF-8");
response.setStatus(HttpServletResponse.SC_OK);
Writer w = response.getWriter();
if (w != null) {
w.write(content);
w.flush();
w.close();
} else {
throw new ServletException("response returned a null writer");
}
}
static public void respond(HttpServletResponse response, String status, String message)
throws IOException {
Writer w = response.getWriter();
try {
JSONWriter writer = new JSONWriter(w);
writer.object();
writer.key("status"); writer.value(status);
writer.key("message"); writer.value(message);
writer.endObject();
w.flush();
w.close();
} catch (JSONException e) {
// This can never occue
}
}
static public void respondJSON(HttpServletResponse response, Jsonizable o)
throws IOException, JSONException {
respondJSON(response, o, new Properties());
}
static public void respondJSON(
HttpServletResponse response, Jsonizable o, Properties options)
throws IOException, JSONException {
response.setCharacterEncoding("UTF-8");
response.setHeader("Content-Type", "application/json");
Writer w = response.getWriter();
JSONWriter writer = new JSONWriter(w);
o.write(writer, options);
w.flush();
w.close();
}
static public void respondException(HttpServletResponse response, Exception e)
throws IOException, ServletException {
logger.warn("Exception caught", e);
if (response == null) {
throw new ServletException("Response object can't be null");
}
try {
JSONObject o = new JSONObject();
o.put("code", "error");
o.put("message", e.getMessage());
StringWriter sw = new StringWriter();
PrintWriter pw = new PrintWriter(sw);
e.printStackTrace(pw);
pw.flush();
sw.flush();
o.put("stack", sw.toString());
response.setCharacterEncoding("UTF-8");
response.setHeader("Content-Type", "application/json");
respond(response, o.toString());
} catch (JSONException e1) {
e.printStackTrace(response.getWriter());
}
}
static public void redirect(HttpServletResponse response, String url) throws IOException {
response.sendRedirect(url);
}
static public int getIntegerParameter(HttpServletRequest request, String name, int def) {
if (request == null) throw new IllegalArgumentException("parameter 'request' should not be null");
try {
return Integer.parseInt(request.getParameter(name));
} catch (Exception e) {
logger.warn("Error getting integer parameter", e);
}
return def;
}
static public JSONObject getJsonParameter(HttpServletRequest request, String name) {
if (request == null) throw new IllegalArgumentException("parameter 'request' should not be null");
String value = request.getParameter(name);
if (value != null) {
try {
return ParsingUtilities.evaluateJsonStringToObject(value);
} catch (JSONException e) {
logger.warn("Error getting json parameter", e);
}
}
return null;
}
static public void respondWithErrorPage(
RefineServlet servlet,
HttpServletRequest request,
HttpServletResponse response,
String message,
Throwable e
) {
respondWithErrorPage(servlet, request, response, message,
HttpServletResponse.SC_INTERNAL_SERVER_ERROR, e);
}
static public void respondWithErrorPage(
RefineServlet servlet,
HttpServletRequest request,
HttpServletResponse response,
String message,
int status,
Throwable e
) {
VelocityContext context = new VelocityContext();
context.put("message", message);
if (e != null) {
StringWriter writer = new StringWriter();
e.printStackTrace(new PrintWriter(writer));
context.put("stack", writer.toString());
} else {
context.put("stack", "");
}
try {
response.setStatus(status);
servlet.getModule("core").sendTextFromTemplate(
request, response, context, "error.vt", "UTF-8", "text/html", true);
} catch (Exception e1) {
e1.printStackTrace();
}
}
}

View File

@ -0,0 +1,61 @@
/*
Copyright 2011, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.commands.importing;
import java.io.IOException;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import com.google.refine.commands.Command;
import com.google.refine.commands.HttpUtilities;
import com.google.refine.importing.ImportingJob;
import com.google.refine.importing.ImportingManager;
public class CancelImportingJobCommand extends Command {
@Override
public void doPost(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
long jobID = Long.parseLong(request.getParameter("jobID"));
ImportingJob job = ImportingManager.getJob(jobID);
if (job == null) {
HttpUtilities.respond(response, "error", "No such import job");
} else {
job.canceled = true;
HttpUtilities.respond(response, "ok", "Job canceled");
}
}
}

View File

@ -43,8 +43,9 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.refine.commands.Command;
import com.google.refine.importing.ImportingManager;
public class CreateImportJobCommand extends Command {
public class CreateImportingJobCommand extends Command {
final static Logger logger = LoggerFactory.getLogger("create-import-job_command");
@ -52,7 +53,7 @@ public class CreateImportJobCommand extends Command {
public void doPost(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
long id = ImportManager.singleton().createJob().id;
long id = ImportingManager.createJob().id;
response.setCharacterEncoding("UTF-8");
response.setHeader("Content-Type", "application/json");

View File

@ -1,6 +1,6 @@
/*
Copyright 2010, Google Inc.
Copyright 2011, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
@ -31,24 +31,38 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.importers;
package com.google.refine.commands.importing;
import java.io.InputStream;
import java.io.IOException;
import java.io.Writer;
import java.util.Properties;
import com.google.refine.ProjectMetadata;
import com.google.refine.model.Project;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
public interface StreamImporter extends Importer {
import org.json.JSONException;
import org.json.JSONWriter;
/**
* @param inputStream stream to be imported
* @param project project to import stream into
* @param metadata metadata of new project
* @param options
* @throws ImportException
*/
public void read(InputStream inputStream, Project project,
ProjectMetadata metadata, Properties options) throws ImportException;
import com.google.refine.commands.Command;
import com.google.refine.importing.ImportingManager;
public class GetImportingConfigurationCommand extends Command {
@Override
public void doPost(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
Writer w = response.getWriter();
JSONWriter writer = new JSONWriter(w);
try {
writer.object();
writer.key("config"); ImportingManager.writeConfiguration(writer, new Properties());
writer.endObject();
} catch (JSONException e) {
throw new ServletException(e);
} finally {
w.flush();
w.close();
}
}
}

View File

@ -34,9 +34,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package com.google.refine.commands.importing;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.io.Writer;
import java.util.Properties;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
@ -44,22 +43,18 @@ import javax.servlet.http.HttpServletResponse;
import org.json.JSONException;
import org.json.JSONWriter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.refine.commands.Command;
import com.google.refine.commands.importing.ImportJob.State;
public class GetImportJobStatusCommand extends Command {
final static Logger logger = LoggerFactory.getLogger("get-import-job-status_command");
import com.google.refine.importing.ImportingJob;
import com.google.refine.importing.ImportingManager;
public class GetImportingJobStatusCommand extends Command {
@Override
public void doPost(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
long jobID = Long.parseLong(request.getParameter("jobID"));
ImportJob job = ImportManager.singleton().getJob(jobID);
ImportingJob job = ImportingManager.getJob(jobID);
Writer w = response.getWriter();
JSONWriter writer = new JSONWriter(w);
@ -70,32 +65,11 @@ public class GetImportJobStatusCommand extends Command {
writer.key("message"); writer.value("No such import job");
} else {
writer.key("code"); writer.value("ok");
writer.key("state");
if (job.state == State.NEW) {
writer.value("new");
} else if (job.state == State.RETRIEVING_DATA) {
writer.value("retrieving");
writer.key("progress"); writer.value(job.retrievingProgress);
writer.key("bytesSaved"); writer.value(job.bytesSaved);
} else if (job.state == State.READY) {
writer.value("ready");
} else if (job.state == State.ERROR) {
writer.value("error");
writer.key("message"); writer.value(job.errorMessage);
if (job.exception != null) {
StringWriter sw = new StringWriter();
PrintWriter pw = new PrintWriter(sw);
job.exception.printStackTrace(pw);
pw.flush();
sw.flush();
writer.key("stack"); writer.value(sw.toString());
}
}
writer.key("job"); job.write(writer, new Properties());
}
writer.endObject();
} catch (JSONException e) {
throw new IOException(e);
throw new ServletException(e);
} finally {
w.flush();
w.close();

View File

@ -1,49 +0,0 @@
package com.google.refine.commands.importing;
import java.io.File;
import java.io.IOException;
import org.apache.commons.io.FileUtils;
import com.google.refine.model.meta.ImportSource;
public class ImportJob {
static public enum State {
NEW,
RETRIEVING_DATA,
READY,
ERROR
}
final public long id;
final public File dir;
public long lastTouched;
public State state = State.NEW;
// Data for retrieving phase
public int retrievingProgress = 0; // from 0 to 100
public long bytesSaved = 0; // in case percentage is unknown
public String errorMessage;
public Throwable exception;
public ImportSource importSource;
public ImportJob(long id, File dir) {
this.id = id;
this.dir = dir;
dir.mkdirs();
}
public void touch() {
lastTouched = System.currentTimeMillis();
}
public void dispose() {
try {
FileUtils.deleteDirectory(dir);
} catch (IOException e) {
}
}
}

View File

@ -1,101 +0,0 @@
package com.google.refine.commands.importing;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.io.FileUtils;
import com.google.refine.RefineServlet;
import com.google.refine.model.meta.ImportSource;
public class ImportManager {
static final private Map<String, Class<? extends ImportSource>> nameToImportSourceClass =
new HashMap<String, Class<? extends ImportSource>>();
static final private Map<String, String> importSourceClassNameToName =
new HashMap<String, String>();
/**
* Register a single import source class.
*
* @param name importer verb for importer
* @param importerObject object implementing the importer
*
* @return true if importer was loaded and registered successfully
*/
static public boolean registerImportSourceClass(String name, Class<? extends ImportSource> klass) {
if (nameToImportSourceClass.containsKey(name)) {
return false;
}
nameToImportSourceClass.put(name, klass);
importSourceClassNameToName.put(klass.getName(), name);
return true;
}
static public Class<? extends ImportSource> getImportSourceClass(String name) {
return nameToImportSourceClass.get(name);
}
static public String getImportSourceClassName(Class<? extends ImportSource> klass) {
return importSourceClassNameToName.get(klass.getName());
}
final private RefineServlet servlet;
final private Map<Long, ImportJob> jobs = new HashMap<Long, ImportJob>();
private File importDir;
static private ImportManager singleton;
static public void initialize(RefineServlet servlet) {
singleton = new ImportManager(servlet);
}
static public ImportManager singleton() {
return singleton;
}
private ImportManager(RefineServlet servlet) {
this.servlet = servlet;
}
private File getImportDir() {
if (importDir == null) {
File tempDir = servlet.getTempDir();
importDir = tempDir == null ? new File(".import-temp") : new File(tempDir, "import");
if (importDir.exists()) {
try {
// start fresh
FileUtils.deleteDirectory(importDir);
} catch (IOException e) {
}
}
importDir.mkdirs();
}
return importDir;
}
public ImportJob createJob() {
long id = System.currentTimeMillis() + (long) (Math.random() * 1000000);
File jobDir = new File(getImportDir(), Long.toString(id));
ImportJob job = new ImportJob(id, jobDir);
jobs.put(id, job);
return job;
}
public ImportJob getJob(long id) {
return jobs.get(id);
}
public void disposeJob(long id) {
ImportJob job = getJob(id);
if (job != null) {
job.dispose();
jobs.remove(id);
}
}
}

View File

@ -44,18 +44,40 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.refine.commands.Command;
import com.google.refine.commands.importing.ImportJob.State;
import com.google.refine.model.meta.ImportSource;
import com.google.refine.commands.HttpUtilities;
import com.google.refine.importing.ImportingController;
import com.google.refine.importing.ImportingManager;
import com.google.refine.util.ParsingUtilities;
public class RetrieveImportContentCommand extends Command {
public class ImportingControllerCommand extends Command {
final static Logger logger = LoggerFactory.getLogger("retrieve-import-content_command");
final static Logger logger = LoggerFactory.getLogger("importing-controller_command");
@Override
public void doPost(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
ImportingController controller = getController(request);
if (controller != null) {
controller.doPost(request, response);
} else {
HttpUtilities.respond(response, "error", "No such import controller");
}
}
@Override
public void doGet(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
ImportingController controller = getController(request);
if (controller != null) {
controller.doPost(request, response);
} else {
HttpUtilities.respond(response, "error", "No such import controller");
}
}
private ImportingController getController(HttpServletRequest request) {
/*
* The uploaded file is in the POST body as a "file part". If
* we call request.getParameter() then the POST body will get
@ -64,39 +86,10 @@ public class RetrieveImportContentCommand extends Command {
* Don't call request.getParameter() before calling internalImport().
*/
Properties options = ParsingUtilities.parseUrlParameters(request);
long jobID = Long.parseLong(options.getProperty("jobID"));
ImportJob job = ImportManager.singleton().getJob(jobID);
if (job == null) {
respondWithErrorPage(request, response, "No such import job", null);
return;
} else if (job.state != State.NEW) {
respondWithErrorPage(request, response, "Import job already started", null);
return;
}
Class<? extends ImportSource> importSourceClass =
ImportManager.getImportSourceClass(options.getProperty("source"));
if (importSourceClass == null) {
respondWithErrorPage(request, response, "No such import source class", null);
return;
}
try {
ImportSource importSource = importSourceClass.newInstance();
job.importSource = importSource;
job.state = State.RETRIEVING_DATA;
importSource.retrieveContent(request, options, job);
job.retrievingProgress = 100;
job.state = State.READY;
} catch (Throwable e) {e.printStackTrace();
job.state = State.ERROR;
job.errorMessage = e.getLocalizedMessage();
job.exception = e;
respondWithErrorPage(request, response, "Failed to kick start import job", e);
String name = options.getProperty("controller");
if (name != null) {
return ImportingManager.controllers.get(name);
}
return null;
}
}

View File

@ -1,6 +1,6 @@
/*
Copyright 2010,2011. Google Inc.
Copyright 2010, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
@ -33,60 +33,22 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package com.google.refine.commands.project;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.Serializable;
import java.io.UnsupportedEncodingException;
import java.net.URI;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;
import java.util.zip.GZIPInputStream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.apache.commons.fileupload.FileItemIterator;
import org.apache.commons.fileupload.FileItemStream;
import org.apache.commons.fileupload.servlet.ServletFileUpload;
import org.apache.commons.fileupload.util.Streams;
import org.apache.tools.bzip2.CBZip2InputStream;
import org.apache.tools.tar.TarEntry;
import org.apache.tools.tar.TarInputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.refine.ProjectManager;
import com.google.refine.ProjectMetadata;
import com.google.refine.commands.Command;
import com.google.refine.importers.Importer;
import com.google.refine.importers.ImporterRegistry;
import com.google.refine.importers.ReaderImporter;
import com.google.refine.importers.StreamImporter;
import com.google.refine.importers.TsvCsvImporter;
import com.google.refine.importers.UrlImporter;
import com.google.refine.commands.HttpUtilities;
import com.google.refine.model.Project;
import com.google.refine.util.IOUtils;
import com.google.refine.util.ParsingUtilities;
import com.ibm.icu.text.CharsetDetector;
import com.ibm.icu.text.CharsetMatch;
public class CreateProjectCommand extends Command {
@ -99,13 +61,6 @@ public class CreateProjectCommand extends Command {
ProjectManager.singleton.setBusy(true);
try {
/*
* Set UTF-8 as request encoding, then ServletFileUpload will use it as default encoding
*/
if (request.getCharacterEncoding() == null) {
request.setCharacterEncoding("UTF-8");
}
/*
* The uploaded file is in the POST body as a "file part". If
* we call request.getParameter() then the POST body will get
@ -118,7 +73,7 @@ public class CreateProjectCommand extends Command {
Project project = new Project();
ProjectMetadata pm = new ProjectMetadata();
internalImport(request, project, pm, options);
//internalImport(request, project, pm, options);
/*
* The import process above populates options with parameters
@ -133,382 +88,11 @@ public class CreateProjectCommand extends Command {
project.update();
redirect(response, "/project?project=" + project.id);
HttpUtilities.redirect(response, "/project?project=" + project.id);
} catch (Exception e) {
respondWithErrorPage(request, response, "Failed to import file", e);
} finally {
ProjectManager.singleton.setBusy(false);
}
}
protected void internalImport(
HttpServletRequest request,
Project project,
ProjectMetadata metadata,
Properties options
) throws Exception {
ServletFileUpload upload = new ServletFileUpload();
String url = options.getProperty("url");
boolean imported = false;
FileItemIterator iter = upload.getItemIterator(request);
while (iter.hasNext()) {
FileItemStream item = iter.next();
String name = item.getFieldName().toLowerCase();
InputStream stream = item.openStream();
if (item.isFormField()) {
if (name.equals("raw-text")) {
Reader reader = new InputStreamReader(stream,request.getCharacterEncoding());
try {
internalInvokeImporter(project, new TsvCsvImporter(), metadata, options, reader);
imported = true;
} finally {
reader.close();
}
} else if (name.equals("project-url")) {
url = Streams.asString(stream, request.getCharacterEncoding());
} else {
options.put(name, Streams.asString(stream, request.getCharacterEncoding()));
}
} else {
String fileName = item.getName().toLowerCase();
if (fileName.length() > 0) {
try {
internalImportFile(project, metadata, options, fileName, stream);
imported = true;
} finally {
stream.close();
}
}
}
}
if (!imported && url != null && url.length() > 0) {
internalImportURL(request, project, metadata, options, url);
}
}
static class SafeInputStream extends FilterInputStream {
public SafeInputStream(InputStream stream) {
super(stream);
}
@Override
public void close() {
// some libraries attempt to close the input stream while they can't
// read anymore from it... unfortunately this behavior prevents
// the zip input stream from functioning correctly so we just have
// to ignore those close() calls and just close it ourselves
// forcefully later
}
public void reallyClose() throws IOException {
super.close();
}
}
protected void internalImportFile(
Project project,
ProjectMetadata metadata,
Properties options,
String fileName,
InputStream inputStream
) throws Exception {
logger.info("Importing '{}'", fileName);
if (fileName.endsWith(".zip") || fileName.endsWith(".tar") || fileName.endsWith(".tar.gz") || fileName.endsWith(".tgz") || fileName.endsWith(".tar.bz2")) {
// first, save the file on disk, since we need two passes and we might
// not have enough memory to keep it all in there
File file = save(inputStream);
// in the first pass, gather statistics about what files are in there
// unfortunately, we have to rely on files extensions, which is horrible but
// better than nothing
HashMap<String,Integer> ext_map = new HashMap<String,Integer>();
FileInputStream fis = new FileInputStream(file);
InputStream is = getStream(fileName, fis);
// NOTE(SM): unfortunately, java.io does not provide any generalized class for
// archive-like input streams so while both TarInputStream and ZipInputStream
// behave precisely the same, there is no polymorphic behavior so we have
// to treat each instance explicitly... one of those times you wish you had
// closures
try {
if (is instanceof TarInputStream) {
TarInputStream tis = (TarInputStream) is;
TarEntry te;
while ((te = tis.getNextEntry()) != null) {
if (!te.isDirectory()) {
mapExtension(te.getName(),ext_map);
}
}
} else if (is instanceof ZipInputStream) {
ZipInputStream zis = (ZipInputStream) is;
ZipEntry ze;
while ((ze = zis.getNextEntry()) != null) {
if (!ze.isDirectory()) {
mapExtension(ze.getName(),ext_map);
}
}
}
} finally {
try {
is.close();
fis.close();
} catch (IOException e) {}
}
// sort extensions by how often they appear
List<Entry<String,Integer>> values = new ArrayList<Entry<String,Integer>>(ext_map.entrySet());
Collections.sort(values, new ValuesComparator());
if (values.size() == 0) {
throw new RuntimeException("The archive contains no files.");
}
// this will contain the set of extensions we'll load from the archive
HashSet<String> exts = new HashSet<String>();
// find the extension that is most frequent or those who share the highest frequency value
if (values.size() == 1) {
exts.add(values.get(0).getKey());
} else {
Entry<String,Integer> most_frequent = values.get(0);
Entry<String,Integer> second_most_frequent = values.get(1);
if (most_frequent.getValue() > second_most_frequent.getValue()) { // we have a winner
exts.add(most_frequent.getKey());
} else { // multiple extensions have the same frequency
int winning_frequency = most_frequent.getValue();
for (Entry<String,Integer> e : values) {
if (e.getValue() == winning_frequency) {
exts.add(e.getKey());
}
}
}
}
logger.info("Most frequent extensions: {}", exts.toString());
// second pass, load the data for real
is = getStream(fileName, new FileInputStream(file));
SafeInputStream sis = new SafeInputStream(is);
try {
if (is instanceof TarInputStream) {
TarInputStream tis = (TarInputStream) is;
TarEntry te;
while ((te = tis.getNextEntry()) != null) {
if (!te.isDirectory()) {
String name = te.getName();
String ext = getExtension(name)[1];
if (exts.contains(ext)) {
internalImportFile(project, metadata, options, name, sis);
}
}
}
} else if (is instanceof ZipInputStream) {
ZipInputStream zis = (ZipInputStream) is;
ZipEntry ze;
while ((ze = zis.getNextEntry()) != null) {
if (!ze.isDirectory()) {
String name = ze.getName();
String ext = getExtension(name)[1];
if (exts.contains(ext)) {
internalImportFile(project, metadata, options, name, sis);
}
}
}
}
} finally {
try {
sis.reallyClose();
} catch (IOException e) {}
}
} else if (fileName.endsWith(".gz")) {
internalImportFile(project, metadata, options, getExtension(fileName)[0], new GZIPInputStream(inputStream));
} else if (fileName.endsWith(".bz2")) {
internalImportFile(project, metadata, options, getExtension(fileName)[0], new CBZip2InputStream(inputStream));
} else {
load(project, metadata, options, fileName, inputStream);
}
}
public static class ValuesComparator implements Comparator<Entry<String,Integer>>, Serializable {
private static final long serialVersionUID = 8845863616149837657L;
public int compare(Entry<String,Integer> o1, Entry<String,Integer> o2) {
return o2.getValue() - o1.getValue();
}
}
private void load(Project project, ProjectMetadata metadata, Properties options, String fileName, InputStream inputStream) throws Exception {
Importer importer = ImporterRegistry.guessImporter(null, fileName);
internalInvokeImporter(project, importer, metadata, options, inputStream, null);
}
private File save(InputStream is) throws IOException {
File temp = this.servlet.getTempFile(Long.toString(System.currentTimeMillis()));
temp.deleteOnExit();
IOUtils.copy(is,temp);
is.close();
return temp;
}
private void mapExtension(String name, Map<String,Integer> ext_map) {
String ext = getExtension(name)[1];
if (ext_map.containsKey(ext)) {
ext_map.put(ext, ext_map.get(ext) + 1);
} else {
ext_map.put(ext, 1);
}
}
private InputStream getStream(String fileName, InputStream is) throws IOException {
if (fileName.endsWith(".tar.gz") || fileName.endsWith(".tgz")) {
return new TarInputStream(new GZIPInputStream(is));
} else if (fileName.endsWith(".tar.bz2")) {
return new TarInputStream(new CBZip2InputStream(is));
} else if (fileName.endsWith(".tar")) {
return new TarInputStream(is);
} else {
return new ZipInputStream(is);
}
}
private String[] getExtension(String filename) {
String[] result = new String[2];
int ext_index = filename.lastIndexOf('.');
result[0] = (ext_index == -1) ? filename : filename.substring(0,ext_index);
result[1] = (ext_index == -1) ? "" : filename.substring(ext_index + 1);
return result;
}
protected void internalImportURL(
HttpServletRequest request,
Project project,
ProjectMetadata metadata,
Properties options,
String urlString) throws Exception {
// Little dance to get URL properly encoded (e.g. for funky Fusion Tables queries)
URL url = new URL(urlString);
url = new URI(url.getProtocol(), url.getHost(), url.getPath(), url.getQuery(), null).toURL();
URLConnection connection = null;
// Try for a URL importer first
Importer importer = ImporterRegistry.guessUrlImporter(url);
if (importer instanceof UrlImporter) {
((UrlImporter) importer).read(url, project, metadata, options);
} else {
// If we couldn't find one, try opening URL and treating as a stream
try {
connection = url.openConnection();
connection.setConnectTimeout(5000);
connection.connect();
} catch (Exception e) {
throw new Exception("Cannot connect to " + urlString, e);
}
InputStream inputStream = null;
try {
inputStream = connection.getInputStream();
} catch (Exception e) {
throw new Exception("Cannot retrieve content from " + url, e);
}
try {
String contentType = connection.getContentType();
int semicolon = contentType.indexOf(';');
if (semicolon >= 0) {
contentType = contentType.substring(0, semicolon);
}
importer = ImporterRegistry.guessImporter(contentType, url.getPath());
internalInvokeImporter(project, importer, metadata, options, inputStream, connection.getContentEncoding());
} finally {
inputStream.close();
}
}
}
protected void internalInvokeImporter(
Project project,
Importer importer,
ProjectMetadata metadata,
Properties options,
InputStream rawInputStream,
String encoding
) throws Exception {
if (importer instanceof ReaderImporter) {
// NOTE: The ICU4J char detection code requires the input stream to support mark/reset.
InputStream inputStream = rawInputStream;
if (!inputStream.markSupported()) {
inputStream = new BufferedInputStream(rawInputStream);
}
CharsetDetector detector = new CharsetDetector();
detector.setDeclaredEncoding("utf8"); // most of the content on the web is encoded in UTF-8 so start with that
options.setProperty("encoding_confidence", "0"); // in case we don't find anything suitable
InputStreamReader reader = null;
CharsetMatch[] charsetMatches = detector.setText(inputStream).detectAll();
for (CharsetMatch charsetMatch : charsetMatches) { // matches are ordered - first is best match
String matchName = charsetMatch.getName();
int confidence = charsetMatch.getConfidence();
// Threshold was 50. Do we ever want to not use our best guess even if it's low confidence? - tfmorris
if (confidence >= 20) {
logger.info("Encoding guess: {} [confidence: {}]", matchName, confidence);
try {
reader = new InputStreamReader(inputStream, matchName);
} catch (UnsupportedEncodingException e) {
logger.debug("Unsupported InputStreamReader charset encoding: {} [confidence: {}]; skipping", matchName, confidence);
continue;
}
// Encoding will be set later at common exit point
options.setProperty("encoding_confidence", Integer.toString(confidence));
break;
} else {
logger.debug("Poor encoding guess: {} [confidence: {}]; skipping", matchName, confidence);
}
}
if (reader == null) { // when all else fails
if (encoding != null) {
reader = new InputStreamReader(inputStream, encoding);
} else {
reader = new InputStreamReader(inputStream);
}
}
// Get the actual encoding which will be used and save it for project metadata
options.setProperty("encoding", reader.getEncoding());
((ReaderImporter) importer).read(reader, project, metadata, options);
} else {
// TODO: How do we set character encoding here?
// Things won't work right if it's not set, so pick some arbitrary values
if (encoding != null) {
options.setProperty("encoding", encoding);
}
options.setProperty("encoding_confidence", "0");
((StreamImporter) importer).read(rawInputStream, project, metadata, options);
}
}
protected void internalInvokeImporter(
Project project,
ReaderImporter importer,
ProjectMetadata metadata,
Properties options,
Reader reader
) throws Exception {
importer.read(reader, project, metadata, options);
}
}

View File

@ -44,17 +44,43 @@ import org.json.JSONException;
import org.json.JSONWriter;
import com.google.refine.commands.Command;
import com.google.refine.commands.HttpUtilities;
import com.google.refine.expr.MetaParser;
import com.google.refine.expr.MetaParser.LanguageInfo;
import com.google.refine.importing.ImportingJob;
import com.google.refine.importing.ImportingManager;
import com.google.refine.model.OverlayModel;
import com.google.refine.model.Project;
public class GetModelsCommand extends Command {
@Override
public void doGet(HttpServletRequest request, HttpServletResponse response)
public void doPost(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
internalRespond(request, response);
}
@Override
public void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
internalRespond(request, response);
}
protected void internalRespond(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
Project project = getProject(request);
Project project = null;
// This command also supports retrieving rows for an importing job.
String importingJobID = request.getParameter("importingJobID");
if (importingJobID != null) {
long jobID = Long.parseLong(importingJobID);
ImportingJob job = ImportingManager.getJob(jobID);
if (job != null) {
project = job.project;
}
}
if (project == null) {
project = getProject(request);
}
try {
response.setCharacterEncoding("UTF-8");
@ -92,7 +118,7 @@ public class GetModelsCommand extends Command {
writer.endObject();
} catch (JSONException e) {
respondException(response, e);
HttpUtilities.respondException(response, e);
}
}

View File

@ -52,6 +52,8 @@ import com.google.refine.browsing.RecordVisitor;
import com.google.refine.browsing.RowVisitor;
import com.google.refine.browsing.Engine.Mode;
import com.google.refine.commands.Command;
import com.google.refine.importing.ImportingJob;
import com.google.refine.importing.ImportingManager;
import com.google.refine.model.Project;
import com.google.refine.model.Record;
import com.google.refine.model.Row;
@ -77,7 +79,21 @@ public class GetRowsCommand extends Command {
throws ServletException, IOException {
try {
Project project = getProject(request);
Project project = null;
// This command also supports retrieving rows for an importing job.
String importingJobID = request.getParameter("importingJobID");
if (importingJobID != null) {
long jobID = Long.parseLong(importingJobID);
ImportingJob job = ImportingManager.getJob(jobID);
if (job != null) {
project = job.project;
}
}
if (project == null) {
project = getProject(request);
}
Engine engine = getEngine(request, project);
String callback = request.getParameter("callback");

View File

@ -33,16 +33,15 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package com.google.refine.importers;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import org.apache.poi.common.usermodel.Hyperlink;
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
@ -51,184 +50,152 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.json.JSONArray;
import org.json.JSONObject;
import com.google.refine.ProjectMetadata;
import com.google.refine.importing.ImportingJob;
import com.google.refine.importing.ImportingUtilities;
import com.google.refine.model.Cell;
import com.google.refine.model.Column;
import com.google.refine.model.Project;
import com.google.refine.model.Recon;
import com.google.refine.model.ReconCandidate;
import com.google.refine.model.Row;
import com.google.refine.model.Recon.Judgment;
import com.google.refine.util.JSONUtilities;
public class ExcelImporter implements StreamImporter {
protected boolean _xmlBased;
public class ExcelImporter extends TabularImportingParserBase {
public ExcelImporter() {
super(true);
}
@Override
public void read(InputStream inputStream, Project project, ProjectMetadata metadata, Properties options) throws ImportException {
int ignoreLines = ImporterUtilities.getIntegerOption("ignore", options, -1);
int headerLines = ImporterUtilities.getIntegerOption("header-lines", options, 1);
int limit = ImporterUtilities.getIntegerOption("limit", options, -1);
int skip = ImporterUtilities.getIntegerOption("skip", options, 0);
public JSONObject createParserUIInitializationData(
ImportingJob job, List<JSONObject> fileRecords, String format) {
JSONObject options = super.createParserUIInitializationData(job, fileRecords, format);
boolean xmlBased = "text/xml/xlsx".equals(format);
JSONUtilities.safePut(options, "xmlBased", xmlBased);
JSONArray sheetRecords = new JSONArray();
JSONUtilities.safePut(options, "sheetRecords", sheetRecords);
try {
JSONObject firstFileRecord = fileRecords.get(0);
File file = ImportingUtilities.getFile(job, firstFileRecord);
InputStream is = new FileInputStream(file);
try {
Workbook wb = xmlBased ?
new XSSFWorkbook(is) :
new HSSFWorkbook(new POIFSFileSystem(is));
int sheetCount = wb.getNumberOfSheets();
boolean hasData = false;
for (int i = 0; i < sheetCount; i++) {
Sheet sheet = wb.getSheetAt(i);
int rows = sheet.getLastRowNum() - sheet.getFirstRowNum() + 1;
JSONObject sheetRecord = new JSONObject();
JSONUtilities.safePut(sheetRecord, "name", sheet.getSheetName());
JSONUtilities.safePut(sheetRecord, "rows", rows);
if (hasData) {
JSONUtilities.safePut(sheetRecord, "selected", false);
} else if (rows > 1) {
JSONUtilities.safePut(sheetRecord, "selected", true);
hasData = true;
}
JSONUtilities.append(sheetRecords, sheetRecord);
}
} finally {
is.close();
}
} catch (IOException e) {
// Ignore
}
return options;
}
@Override
public void parseOneFile(
Project project,
ProjectMetadata metadata,
ImportingJob job,
String fileSource,
InputStream inputStream,
int limit,
JSONObject options,
List<Exception> exceptions
) {
boolean xmlBased = JSONUtilities.getBoolean(options, "xmlBased", false);
Workbook wb = null;
try {
wb = _xmlBased ?
wb = xmlBased ?
new XSSFWorkbook(inputStream) :
new HSSFWorkbook(new POIFSFileSystem(inputStream));
} catch (IOException e) {
throw new ImportException(
exceptions.add(new ImportException(
"Attempted to parse as an Excel file but failed. " +
"Try to use Excel to re-save the file as a different Excel version or as TSV and upload again.",
e
);
));
return;
} catch (ArrayIndexOutOfBoundsException e){
throw new ImportException(
exceptions.add(new ImportException(
"Attempted to parse file as an Excel file but failed. " +
"This is probably caused by a corrupt excel file, or due to the file having previously been created or saved by a non-Microsoft application. " +
"Please try opening the file in Microsoft Excel and resaving it, then try re-uploading the file. " +
"See https://issues.apache.org/bugzilla/show_bug.cgi?id=48261 for further details",
e);
e
));
return;
}
Sheet sheet = wb.getSheetAt(0);
int[] sheets = JSONUtilities.getIntArray(options, "sheets");
for (int sheetIndex : sheets) {
final Sheet sheet = wb.getSheetAt(sheetIndex);
final int lastRow = sheet.getLastRowNum();
int firstRow = sheet.getFirstRowNum();
int lastRow = sheet.getLastRowNum();
List<String> columnNames = new ArrayList<String>();
Set<String> columnNameSet = new HashSet<String>();
Map<String, Integer> columnRootNameToIndex = new HashMap<String, Integer>();
int rowsWithData = 0;
TableDataReader dataReader = new TableDataReader() {
int nextRow = 0;
Map<String, Recon> reconMap = new HashMap<String, Recon>();
for (int r = firstRow; r <= lastRow; r++) {
org.apache.poi.ss.usermodel.Row row = sheet.getRow(r);
if (row == null) {
continue;
} else if (ignoreLines > 0) {
ignoreLines--;
continue;
@Override
public List<Object> getNextRowOfCells() throws IOException {
if (nextRow >= lastRow) {
return null;
}
short firstCell = row.getFirstCellNum();
List<Object> cells = new ArrayList<Object>();
org.apache.poi.ss.usermodel.Row row = sheet.getRow(nextRow++);
if (row != null) {
short lastCell = row.getLastCellNum();
if (firstCell < 0 || firstCell > lastCell) {
continue;
for (short cellIndex = 0; cellIndex <= lastCell; cellIndex++) {
Cell cell = null;
org.apache.poi.ss.usermodel.Cell sourceCell = row.getCell(cellIndex);
if (sourceCell != null) {
cell = extractCell(sourceCell, reconMap);
}
/*
* Still processing header lines
*/
if (headerLines > 0) {
headerLines--;
for (int c = firstCell; c <= lastCell; c++) {
org.apache.poi.ss.usermodel.Cell cell = row.getCell(c);
if (cell != null) {
Serializable value = extractCell(cell);
String text = value != null ? value.toString() : null;
if (text != null && text.length() > 0) {
while (columnNames.size() < c + 1) {
columnNames.add(null);
cells.add(cell);
}
String existingName = columnNames.get(c);
String name = (existingName == null) ? text : (existingName + " " + text);
columnNames.set(c, name);
}
return cells;
}
};
readTable(
project,
metadata,
job,
dataReader,
fileSource + "#" + sheet.getSheetName(),
limit,
options,
exceptions
);
}
}
if (headerLines == 0) {
for (int i = 0; i < columnNames.size(); i++) {
String rootName = columnNames.get(i);
if (rootName == null) {
continue;
}
setUnduplicatedColumnName(rootName, columnNames, i, columnNameSet, columnRootNameToIndex);
}
}
/*
* Processing data rows
*/
} else {
Row newRow = new Row(columnNames.size());
boolean hasData = false;
for (int c = firstCell; c <= lastCell; c++) {
org.apache.poi.ss.usermodel.Cell cell = row.getCell(c);
if (cell == null) {
continue;
}
Cell ourCell = extractCell(cell, reconMap);
if (ourCell != null) {
while (columnNames.size() < c + 1) {
columnNames.add(null);
}
if (columnNames.get(c) == null) {
setUnduplicatedColumnName("Column", columnNames, c, columnNameSet, columnRootNameToIndex);
}
newRow.setCell(c, ourCell);
hasData = true;
}
}
if (hasData) {
rowsWithData++;
if (skip <= 0 || rowsWithData > skip) {
project.rows.add(newRow);
project.columnModel.setMaxCellIndex(newRow.cells.size());
if (limit > 0 && project.rows.size() >= limit) {
break;
}
}
}
}
}
/*
* Create columns
*/
for (int c = 0; c < columnNames.size(); c++) {
String name = columnNames.get(c);
if (name != null) {
Column column = new Column(c, name);
project.columnModel.columns.add(column);
}
}
}
protected void setUnduplicatedColumnName(
String rootName, List<String> columnNames, int index, Set<String> columnNameSet, Map<String, Integer> columnRootNameToIndex) {
if (columnNameSet.contains(rootName)) {
int startIndex = columnRootNameToIndex.containsKey(rootName) ? columnRootNameToIndex.get(rootName) : 2;
while (true) {
String name = rootName + " " + startIndex;
if (columnNameSet.contains(name)) {
startIndex++;
} else {
columnNames.set(index, name);
columnNameSet.add(name);
break;
}
}
columnRootNameToIndex.put(rootName, startIndex + 1);
} else {
columnNames.set(index, rootName);
columnNameSet.add(rootName);
}
}
protected Serializable extractCell(org.apache.poi.ss.usermodel.Cell cell) {
static protected Serializable extractCell(org.apache.poi.ss.usermodel.Cell cell) {
int cellType = cell.getCellType();
if (cellType == org.apache.poi.ss.usermodel.Cell.CELL_TYPE_FORMULA) {
cellType = cell.getCachedFormulaResultType();
@ -259,7 +226,7 @@ public class ExcelImporter implements StreamImporter {
return value;
}
protected Cell extractCell(org.apache.poi.ss.usermodel.Cell cell, Map<String, Recon> reconMap) {
static protected Cell extractCell(org.apache.poi.ss.usermodel.Cell cell, Map<String, Recon> reconMap) {
Serializable value = extractCell(cell);
if (value != null) {
@ -312,33 +279,4 @@ public class ExcelImporter implements StreamImporter {
return null;
}
}
@Override
public boolean canImportData(String contentType, String fileName) {
if (contentType != null) {
contentType = contentType.toLowerCase().trim();
if ("application/msexcel".equals(contentType) ||
"application/x-msexcel".equals(contentType) ||
"application/x-ms-excel".equals(contentType) ||
"application/vnd.ms-excel".equals(contentType) ||
"application/x-excel".equals(contentType) ||
"application/xls".equals(contentType)) {
this._xmlBased = false;
return true;
} else if("application/x-xls".equals(contentType)) {
this._xmlBased = true;
return true;
}
} else if (fileName != null) {
fileName = fileName.toLowerCase();
if (fileName.endsWith(".xls")) {
this._xmlBased = false;
return true;
} else if (fileName.endsWith(".xlsx")) {
this._xmlBased = true;
return true;
}
}
return false;
}
}

View File

@ -1,177 +1,105 @@
package com.google.refine.importers;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.io.Reader;
import java.io.Serializable;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import java.util.regex.Pattern;
import javax.servlet.ServletException;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.json.JSONArray;
import org.json.JSONObject;
import com.google.refine.ProjectMetadata;
import com.google.refine.expr.ExpressionUtils;
import com.google.refine.model.Cell;
import com.google.refine.importing.ImportingJob;
import com.google.refine.importing.ImportingUtilities;
import com.google.refine.model.Project;
import com.google.refine.model.Row;
import com.google.refine.util.JSONUtilities;
public class FixedWidthImporter implements ReaderImporter, StreamImporter { //TODO this class is almost an exact copy of TsvCsvImporter. Could we combine the two, or combine common functions into a common abstract supertype?
final static Logger logger = LoggerFactory.getLogger("FixedWidthImporter");
@Override
public boolean canImportData(String contentType, String fileName) {
if (contentType != null) {
contentType = contentType.toLowerCase().trim();
//filter out tree structure data
if("application/json".equals(contentType)||
"text/json".equals(contentType)||
"application/xml".equals(contentType) ||
"text/xml".equals(contentType) ||
"application/rss+xml".equals(contentType) ||
"application/atom+xml".equals(contentType) ||
"application/rdf+xml".equals(contentType)) //TODO add more tree data types.
return false;
return
"text/plain".equals(contentType)
|| "text/fixed-width".equals(contentType); //FIXME Is text/fixed-width a valid contentType?
}
return false;
public class FixedWidthImporter extends TabularImportingParserBase {
public FixedWidthImporter() {
super(false);
}
@Override
public void read(InputStream inputStream, Project project,
ProjectMetadata metadata, Properties options)
throws ImportException {
read(new InputStreamReader(inputStream), project, metadata, options);
public JSONObject createParserUIInitializationData(
ImportingJob job, List<JSONObject> fileRecords, String format) {
JSONObject options = super.createParserUIInitializationData(job, fileRecords, format);
JSONArray columnWidths = new JSONArray();
JSONObject firstFileRecord = fileRecords.get(0);
String encoding = ImportingUtilities.getEncoding(firstFileRecord);
String location = JSONUtilities.getString(firstFileRecord, "location", null);
if (location != null) {
File file = new File(job.getRawDataDir(), location);
int[] columnWidthsA = guessColumnWidths(file, encoding);
if (columnWidthsA != null) {
for (int w : columnWidthsA) {
JSONUtilities.append(columnWidths, w);
}
}
}
JSONUtilities.safePut(options, "lineSeparator", "\n");
JSONUtilities.safePut(options, "headerLines", 0);
JSONUtilities.safePut(options, "columnWidths", columnWidths);
JSONUtilities.safePut(options, "guessCellValueTypes", true);
return options;
}
@Override
public void read(Reader reader, Project project, ProjectMetadata metadata,
Properties options) throws ImportException {
boolean splitIntoColumns = ImporterUtilities.getBooleanOption("split-into-columns", options, true);
String columnWidths = options.getProperty("fixed-column-widths");
int ignoreLines = ImporterUtilities.getIntegerOption("ignore", options, -1);
int headerLines = ImporterUtilities.getIntegerOption("header-lines", options, 1);
int limit = ImporterUtilities.getIntegerOption("limit",options,-1);
int skip = ImporterUtilities.getIntegerOption("skip",options,0);
boolean guessValueType = ImporterUtilities.getBooleanOption("guess-value-type", options, true);
LineNumberReader lnReader = new LineNumberReader(reader);
read(lnReader, project, columnWidths,
limit, skip, ignoreLines, headerLines,
guessValueType, splitIntoColumns
);
public void parseOneFile(
Project project,
ProjectMetadata metadata,
ImportingJob job,
String fileSource,
Reader reader,
int limit,
JSONObject options,
List<Exception> exceptions
) {
// String lineSeparator = JSONUtilities.getString(options, "lineSeparator", "\n");
final int[] columnWidths = JSONUtilities.getIntArray(options, "columnWidths");
final List<Object> columnNames;
if (options.has("columnNames")) {
columnNames = new ArrayList<Object>();
String[] strings = JSONUtilities.getStringArray(options, "columnNames");
for (String s : strings) {
columnNames.add(s);
}
/**
*
* @param lnReader
* LineNumberReader used to read file or string contents
* @param project
* The project into which the parsed data will be added
* @param columnWidths
* Expects a comma separated string of integers which indicate the number of characters in each line
* @param limit
* The maximum number of rows of data to import
* @param skip
* The number of initial data rows to skip
* @param ignoreLines
* The number of initial lines within the data source which should be ignored entirely
* @param headerLines
* The number of lines in the data source which describe each column
* @param guessValueType
* Whether the parser should try and guess the type of the value being parsed
* @param splitIntoColumns
* Whether the parser should try and split the data source into columns
* @throws IOException
*/
public void read(LineNumberReader lnReader, Project project,
String sep, int limit, int skip, int ignoreLines,
int headerLines, boolean guessValueType, boolean splitIntoColumns) throws ImportException{
int[] columnWidths = null;
columnWidths = getColumnWidthsFromString( sep );
if(columnWidths.length < 2)
splitIntoColumns = false;
List<String> columnNames = new ArrayList<String>();
String line = null;
int rowsWithData = 0;
try {
while ((line = lnReader.readLine()) != null) {
if (ignoreLines > 0) {
ignoreLines--;
continue;
} else if (StringUtils.isBlank(line)) {
continue;
}
if (headerLines > 0) {
//column headers
headerLines--;
ArrayList<String> cells = getCells(line, columnWidths, splitIntoColumns);
for (int c = 0; c < cells.size(); c++) {
String cell = cells.get(c).trim();
//add column even if cell is blank
ImporterUtilities.appendColumnName(columnNames, c, cell);
}
JSONUtilities.safePut(options, "headerLines", 1);
} else {
//data
Row row = new Row(columnNames.size());
columnNames = null;
}
ArrayList<String> cells = getCells(line, columnWidths, splitIntoColumns);
final LineNumberReader lnReader = new LineNumberReader(reader);
if( cells != null && cells.size() > 0 )
rowsWithData++;
TableDataReader dataReader = new TableDataReader() {
boolean usedColumnNames = false;
if (skip <=0 || rowsWithData > skip){
//add parsed data to row
for(String s : cells){
if (ExpressionUtils.isNonBlankData(s)) {
Serializable value = guessValueType ? ImporterUtilities.parseCellValue(s) : s;
row.cells.add(new Cell(value, null));
@Override
public List<Object> getNextRowOfCells() throws IOException {
if (columnNames != null && !usedColumnNames) {
usedColumnNames = true;
return columnNames;
} else {
row.cells.add(null);
String line = lnReader.readLine();
if (line == null) {
return null;
} else {
return getCells(line, columnWidths);
}
}
project.rows.add(row);
project.columnModel.setMaxCellIndex(row.cells.size());
ImporterUtilities.ensureColumnsInRowExist(columnNames, row);
if (limit > 0 && project.rows.size() >= limit) {
break;
}
}
}
}
} catch (IOException e) {
throw new ImportException("The fixed width importer could not read the next line", e);
}
ImporterUtilities.setupColumns(project, columnNames);
}
};
readTable(project, metadata, job, dataReader, fileSource, limit, options, exceptions);
}
/**
@ -181,9 +109,9 @@ public class FixedWidthImporter implements ReaderImporter, StreamImporter { //TO
* @param splitIntoColumns
* @return
*/
private ArrayList<String> getCells(String line, int[] widths, boolean splitIntoColumns) {
ArrayList<String> cells = new ArrayList<String>();
if(splitIntoColumns){
static private ArrayList<Object> getCells(String line, int[] widths) {
ArrayList<Object> cells = new ArrayList<Object>();
int columnStartCursor = 0;
int columnEndCursor = 0;
for (int width : widths) {
@ -194,8 +122,9 @@ public class FixedWidthImporter implements ReaderImporter, StreamImporter { //TO
columnEndCursor = columnStartCursor + width;
if(columnEndCursor > line.length())
if (columnEndCursor > line.length()) {
columnEndCursor = line.length();
}
if (columnEndCursor <= columnStartCursor) {
cells.add(null); //FIXME is adding a null cell (to represent no data, or a zero width column) OK?
continue;
@ -205,36 +134,83 @@ public class FixedWidthImporter implements ReaderImporter, StreamImporter { //TO
columnStartCursor = columnEndCursor;
}
}else{
cells.add(line);
// Residual text
if (columnStartCursor < line.length()) {
cells.add(line.substring(columnStartCursor));
}
return cells;
}
/**
* Converts the expected string of comma separated integers into an array of integers.
* Also performs a basic sanity check on the provided data.
*
* @param sep
* A comma separated string of integers. e.g. 4,2,5,22,19
* @return
* @throws ServletException
*/
public int[] getColumnWidthsFromString(String sep) throws ImportException {
String[] splitSep = Pattern.compile(",").split(sep);
int[] widths = new int[splitSep.length];
for(int i = 0; i < splitSep.length; i++){
static public int[] guessColumnWidths(File file, String encoding) {
try {
int parsedInt = Integer.parseInt(splitSep[i]);
if( parsedInt < 0 )
throw new ImportException("A column cannot have a width of less than zero", null);
widths[i] = parsedInt;
}catch(NumberFormatException e){
throw new ImportException("For a fixed column width import, the column widths must be given as a comma separated string of integers. e.g. 1,3,5,22,19", e);
InputStream is = new FileInputStream(file);
try {
Reader reader = encoding != null ? new InputStreamReader(is, encoding) : new InputStreamReader(is);
LineNumberReader lineNumberReader = new LineNumberReader(reader);
int[] counts = null;
int totalBytes = 0;
int lineCount = 0;
String s;
while (totalBytes < 64 * 1024 &&
lineCount < 100 &&
(s = lineNumberReader.readLine()) != null) {
totalBytes += s.length() + 1; // count the new line character
if (s.length() == 0) {
continue;
}
lineCount++;
if (counts == null) {
counts = new int[s.length()];
for (int c = 0; c < counts.length; c++) {
counts[c] = 0;
}
return widths;
}
for (int c = 0; c < counts.length && c < s.length(); c++) {
char ch = s.charAt(c);
if (ch == ' ') {
counts[c]++;
}
}
}
if (counts != null) {
List<Integer> widths = new ArrayList<Integer>();
int startIndex = 0;
for (int c = 0; c < counts.length; c++) {
int count = counts[c];
if (count == lineCount && c > startIndex) {
widths.add(c - startIndex + 1);
startIndex = c + 1;
}
}
for (int i = widths.size() - 1; i > 0; i--) {
if (widths.get(i) == 1) {
widths.remove(i);
widths.set(i - 1, widths.get(i - 1) + 1);
}
}
int[] widthA = new int[widths.size()];
for (int i = 0; i < widthA.length; i++) {
widthA[i] = widths.get(i);
}
return widthA;
}
} finally {
is.close();
}
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
}

View File

@ -38,11 +38,9 @@ package com.google.refine.importers;
* indicating the underlying cause of the problem.
*/
public class ImportException extends Exception {
private static final long serialVersionUID = 7077314805989174181L;
public ImportException(String message, Throwable cause) {
super(message, cause);
}
}

View File

@ -1,138 +0,0 @@
/*
Copyright 2010, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.importers;
import java.net.URL;
import java.util.HashMap;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
abstract public class ImporterRegistry {
final static Logger logger = LoggerFactory.getLogger("importer-registry");
static final private Map<String, Importer> importers = new HashMap<String, Importer>();
private static final String[][] importerNames = {
{"ExcelImporter", "com.google.refine.importers.ExcelImporter"},
{"XmlImporter", "com.google.refine.importers.XmlImporter"},
{"RdfTripleImporter", "com.google.refine.importers.RdfTripleImporter"},
{"MarcImporter", "com.google.refine.importers.MarcImporter"},
{"TsvCsvImporter", "com.google.refine.importers.TsvCsvImporter"},
{"JsonImporter", "com.google.refine.importers.JsonImporter"},
{"FixedWidthImporter", "com.google.refine.importers.FixedWidthImporter"}
};
static {
registerImporters(importerNames);
}
static public boolean registerImporters(String[][] importers) {
boolean status = true;
for (String[] importer : importerNames) {
String importerName = importer[0];
String className = importer[1];
logger.debug("Loading command " + importerName + " class: " + className);
Importer cmd;
try {
// TODO: May need to use the servlet container's class loader here
cmd = (Importer) Class.forName(className).newInstance();
} catch (InstantiationException e) {
logger.error("Failed to load importer class " + className, e);
status = false;
continue;
} catch (IllegalAccessException e) {
logger.error("Failed to load importer class " + className, e);
status = false;
continue;
} catch (ClassNotFoundException e) {
logger.error("Failed to load importer class " + className, e);
status = false;
continue;
}
status |= registerImporter(importerName, cmd);
}
return status;
}
/**
* Register a single importer.
*
* @param name importer verb for importer
* @param importerObject object implementing the importer
*
* @return true if importer was loaded and registered successfully
*/
static public boolean registerImporter(String name, Importer importerObject) {
if (importers.containsKey(name)) {
return false;
}
importers.put(name, importerObject);
return true;
}
// Currently only for test purposes
static protected boolean unregisterImporter(String verb) {
return importers.remove(verb) != null;
}
static public Importer guessImporter(String contentType, String fileName, boolean provideDefault) {
for (Importer i : importers.values()){
if(i.canImportData(contentType, fileName)){
return i;
}
}
if (provideDefault) {
return new TsvCsvImporter(); // default
} else {
return null;
}
}
static public Importer guessImporter(String contentType, String filename) {
return guessImporter(contentType, filename, true);
}
static public Importer guessUrlImporter(URL url) {
for (Importer importer : importers.values()){
if (importer instanceof UrlImporter
&& ((UrlImporter) importer).canImportData(url)) {
return importer;
}
}
return null;
}
}

View File

@ -33,15 +33,25 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package com.google.refine.importers;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.InputStream;
import java.io.Serializable;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import org.json.JSONObject;
import com.google.refine.importing.ImportingJob;
import com.google.refine.importing.ImportingUtilities;
import com.google.refine.model.Column;
import com.google.refine.model.ModelException;
import com.google.refine.model.Project;
import com.google.refine.model.Row;
import com.google.refine.util.TrackingInputStream;
public class ImporterUtilities {
@ -118,6 +128,33 @@ public class ImporterUtilities {
}
}
static public Column getOrAllocateColumn(Project project, List<String> currentFileColumnNames, int index) {
if (index < currentFileColumnNames.size()) {
return project.columnModel.getColumnByName(currentFileColumnNames.get(index));
} else if (index == currentFileColumnNames.size()) {
String prefix = "Column ";
int i = 1;
while (true) {
String columnName = prefix + i;
if (project.columnModel.getColumnByName(columnName) != null) {
// Already taken name
i++;
} else {
Column column = new Column(project.columnModel.allocateNewCellIndex(), columnName);
try {
project.columnModel.addColumn(project.columnModel.columns.size(), column, false);
} catch (ModelException e) {
// Ignore: shouldn't get in here since we just checked for duplicate names.
}
currentFileColumnNames.add(columnName);
return column;
}
}
} else {
throw new RuntimeException("Unexpected code path");
}
}
static public void setupColumns(Project project, List<String> columnNames) {
Map<String, Integer> nameToIndex = new HashMap<String, Integer>();
for (int c = 0; c < columnNames.size(); c++) {
@ -125,7 +162,8 @@ public class ImporterUtilities {
if (cell.isEmpty()) {
cell = "Column";
} else if (cell.startsWith("\"") && cell.endsWith("\"")) {
cell = cell.substring(1, cell.length() - 1).trim(); //FIXME is trimming quotation marks appropriate?
// FIXME: is trimming quotation marks appropriate?
cell = cell.substring(1, cell.length() - 1).trim();
}
if (nameToIndex.containsKey(cell)) {
@ -137,10 +175,74 @@ public class ImporterUtilities {
nameToIndex.put(cell, 2);
}
Column column = new Column(c, cell);
project.columnModel.columns.add(column);
columnNames.set(c, cell);
if (project.columnModel.getColumnByName(cell) == null) {
Column column = new Column(project.columnModel.allocateNewCellIndex(), cell);
try {
project.columnModel.addColumn(project.columnModel.columns.size(), column, false);
} catch (ModelException e) {
// Ignore: shouldn't get in here since we just checked for duplicate names.
}
}
}
}
static public interface MultiFileReadingProgress {
public void startFile(String fileSource);
public void readingFile(String fileSource, long bytesRead);
public void endFile(String fileSource, long bytesRead);
}
static public MultiFileReadingProgress createMultiFileReadingProgress(
final ImportingJob job, List<JSONObject> fileRecords) {
long totalSize = 0;
for (JSONObject fileRecord : fileRecords) {
File file = ImportingUtilities.getFile(job, fileRecord);
totalSize += file.length();
}
final long totalSize2 = totalSize;
return new MultiFileReadingProgress() {
long totalBytesRead = 0;
void setProgress(String fileSource, long bytesRead) {
ImportingUtilities.setCreatingProjectProgress(
job,
"Reading " + fileSource,
(int) (100 * (totalBytesRead + bytesRead) / totalSize2));
}
@Override
public void startFile(String fileSource) {
setProgress(fileSource, 0);
}
@Override
public void readingFile(String fileSource, long bytesRead) {
setProgress(fileSource, bytesRead);
}
@Override
public void endFile(String fileSource, long bytesRead) {
totalBytesRead += bytesRead;
}
};
}
static public InputStream openAndTrackFile(
final String fileSource,
final File file,
final MultiFileReadingProgress progress) throws FileNotFoundException {
InputStream inputStream = new FileInputStream(file);
return progress == null ? inputStream : new TrackingInputStream(inputStream) {
@Override
protected long track(long bytesRead) {
long l = super.track(bytesRead);
progress.readingFile(fileSource, this.bytesRead);
return l;
}
};
}
}

View File

@ -0,0 +1,138 @@
/*
Copyright 2011, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.importers;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.util.List;
import org.apache.commons.lang.NotImplementedException;
import org.json.JSONObject;
import com.google.refine.ProjectMetadata;
import com.google.refine.importers.ImporterUtilities.MultiFileReadingProgress;
import com.google.refine.importing.ImportingJob;
import com.google.refine.importing.ImportingParser;
import com.google.refine.importing.ImportingUtilities;
import com.google.refine.model.Project;
abstract public class ImportingParserBase implements ImportingParser {
final protected boolean useInputStream;
protected ImportingParserBase(boolean useInputStream) {
this.useInputStream = useInputStream;
}
@Override
public void parse(Project project, ProjectMetadata metadata,
final ImportingJob job, List<JSONObject> fileRecords, String format,
int limit, JSONObject options, List<Exception> exceptions) {
MultiFileReadingProgress progress = ImporterUtilities.createMultiFileReadingProgress(job, fileRecords);
for (JSONObject fileRecord : fileRecords) {
if (job.canceled) {
break;
}
try {
parseOneFile(project, metadata, job, fileRecord, limit, options, exceptions, progress);
} catch (IOException e) {
exceptions.add(e);
}
if (limit > 0 && project.rows.size() >= limit) {
break;
}
}
}
public void parseOneFile(
Project project,
ProjectMetadata metadata,
ImportingJob job,
JSONObject fileRecord,
int limit,
JSONObject options,
List<Exception> exceptions,
final MultiFileReadingProgress progress
) throws IOException {
final File file = ImportingUtilities.getFile(job, fileRecord);
final String fileSource = ImportingUtilities.getFileSource(fileRecord);
progress.startFile(fileSource);
try {
InputStream inputStream = ImporterUtilities.openAndTrackFile(fileSource, file, progress);
try {
if (useInputStream) {
parseOneFile(project, metadata, job, fileSource, inputStream, limit, options, exceptions);
} else {
Reader reader = ImportingUtilities.getReaderFromStream(inputStream, fileRecord);
parseOneFile(project, metadata, job, fileSource, reader, limit, options, exceptions);
}
} finally {
inputStream.close();
}
} finally {
progress.endFile(fileSource, file.length());
}
}
public void parseOneFile(
Project project,
ProjectMetadata metadata,
ImportingJob job,
String fileSource,
Reader reader,
int limit,
JSONObject options,
List<Exception> exceptions
) {
throw new NotImplementedException();
}
public void parseOneFile(
Project project,
ProjectMetadata metadata,
ImportingJob job,
String fileSource,
InputStream inputStream,
int limit,
JSONObject options,
List<Exception> exceptions
) {
throw new NotImplementedException();
}
}

View File

@ -33,95 +33,328 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package com.google.refine.importers;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
import java.util.Properties;
import java.io.Reader;
import java.util.List;
import javax.servlet.ServletException;
import org.codehaus.jackson.JsonFactory;
import org.codehaus.jackson.JsonParseException;
import org.codehaus.jackson.JsonParser;
import org.codehaus.jackson.JsonToken;
import org.json.JSONArray;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.refine.ProjectMetadata;
import com.google.refine.importers.TreeImportUtilities.ImportColumnGroup;
import com.google.refine.importers.parsers.JSONParser;
import com.google.refine.importers.parsers.TreeParser;
import com.google.refine.importers.tree.ImportColumnGroup;
import com.google.refine.importers.tree.TreeImportingParserBase;
import com.google.refine.importers.tree.TreeReader;
import com.google.refine.importing.ImportingJob;
import com.google.refine.importing.ImportingUtilities;
import com.google.refine.model.Project;
import com.google.refine.util.JSONUtilities;
public class JsonImporter implements StreamImporter{
final static Logger logger = LoggerFactory.getLogger("JsonImporter");
public class JsonImporter extends TreeImportingParserBase {
public JsonImporter() {
super(false);
}
public static final int BUFFER_SIZE = 64 * 1024;
static private class PreviewParsingState {
int tokenCount;
}
final static private int PREVIEW_PARSING_LIMIT = 1000;
@Override
public void read(InputStream inputStream, Project project,
ProjectMetadata metadata, Properties options)
throws ImportException {
//FIXME the below is a close duplicate of the XmlImporter code.
//Should wrap a lot of the below into methods and put them in a common superclass
logger.trace("JsonImporter.read");
PushbackInputStream pis = new PushbackInputStream(inputStream,BUFFER_SIZE);
String[] recordPath = null;
{
byte[] buffer = new byte[BUFFER_SIZE];
int bytes_read = 0;
try {//fill the buffer with data
while (bytes_read < BUFFER_SIZE) {
int c = pis.read(buffer, bytes_read, BUFFER_SIZE - bytes_read);
if (c == -1) break;
bytes_read +=c ;
}
pis.unread(buffer, 0, bytes_read);
} catch (IOException e) {
throw new ImportException("Read error",e);
}
InputStream iStream = new ByteArrayInputStream(buffer, 0, bytes_read);
TreeParser parser = new JSONParser(iStream);
if (options.containsKey("importer-record-tag")) {
public JSONObject createParserUIInitializationData(
ImportingJob job, List<JSONObject> fileRecords, String format) {
JSONObject options = super.createParserUIInitializationData(job, fileRecords, format);
try {
recordPath = XmlImportUtilities.detectPathFromTag(
parser,
options.getProperty("importer-record-tag"));
JSONObject firstFileRecord = fileRecords.get(0);
File file = ImportingUtilities.getFile(job, firstFileRecord);
InputStream is = new FileInputStream(file);
try {
JsonFactory factory = new JsonFactory();
JsonParser parser = factory.createJsonParser(is);
PreviewParsingState state = new PreviewParsingState();
Object rootValue = parseForPreview(parser, state);
if (rootValue != null) {
JSONUtilities.safePut(options, "dom", rootValue);
}
} finally {
is.close();
}
} catch (IOException e) {
// Ignore
}
return options;
}
final static private Object parseForPreview(JsonParser parser, PreviewParsingState state, JsonToken token)
throws JsonParseException, IOException {
if (token != null) {
switch (token) {
case START_ARRAY:
return parseArrayForPreview(parser, state);
case START_OBJECT:
return parseObjectForPreview(parser, state);
case VALUE_STRING:
return parser.getText();
case VALUE_NUMBER_INT:
return Integer.valueOf(parser.getIntValue());
case VALUE_NUMBER_FLOAT:
return Float.valueOf(parser.getFloatValue());
case VALUE_TRUE:
return Boolean.TRUE;
case VALUE_FALSE:
return Boolean.FALSE;
case VALUE_NULL:
return null;
}
}
return null;
}
final static private Object parseForPreview(JsonParser parser, PreviewParsingState state) {
try {
JsonToken token = parser.nextToken();
state.tokenCount++;
return parseForPreview(parser, state, token);
} catch (Exception e) {
return null;
}
}
final static private JSONObject parseObjectForPreview(JsonParser parser, PreviewParsingState state) {
JSONObject result = new JSONObject();
loop:while (state.tokenCount < PREVIEW_PARSING_LIMIT) {
try {
JsonToken token = parser.nextToken();
if (token == null) {
break;
}
state.tokenCount++;
switch (token) {
case FIELD_NAME:
String fieldName = parser.getText();
Object fieldValue = parseForPreview(parser, state);
JSONUtilities.safePut(result, fieldName, fieldValue);
break;
case END_OBJECT:
break loop;
default:
break loop;
}
} catch (Exception e) {
break;
}
}
return result;
}
final static private JSONArray parseArrayForPreview(JsonParser parser, PreviewParsingState state) {
JSONArray result = new JSONArray();
loop:while (state.tokenCount < PREVIEW_PARSING_LIMIT) {
try {
JsonToken token = parser.nextToken();
if (token == null) {
break;
}
state.tokenCount++;
switch (token) {
case END_ARRAY:
break loop;
default:
Object element = parseForPreview(parser, state, token);
JSONUtilities.append(result, element);
}
} catch (Exception e) {
break;
}
}
return result;
}
@Override
public void parseOneFile(Project project, ProjectMetadata metadata,
ImportingJob job, String fileSource, Reader reader,
ImportColumnGroup rootColumnGroup, int limit, JSONObject options, List<Exception> exceptions) {
parseOneFile(project, metadata, job, fileSource,
new JSONTreeReader(reader), rootColumnGroup, limit, options, exceptions);
}
static public class JSONTreeReader implements TreeReader {
final static Logger logger = LoggerFactory.getLogger("JsonParser");
JsonFactory factory = new JsonFactory();
JsonParser parser = null;
//The following is a workaround for inconsistent Jackson JsonParser
Boolean lastTokenWasAFieldNameAndCurrentTokenIsANewEntity = false;
Boolean thisTokenIsAFieldName = false;
String lastFieldName = null;
//end of workaround
public JSONTreeReader(Reader reader) {
try {
parser = factory.createJsonParser(reader);
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* Does nothing. All Json is treated as elements
*/
@Override
public int getAttributeCount() {
// TODO Auto-generated method stub
return 0;
}
/**
* Does nothing. All Json is treated as elements
*/
@Override
public String getAttributeLocalName(int index) {
return null;
}
/**
* Does nothing. All Json is treated as elements
*/
@Override
public String getAttributePrefix(int index) {
// TODO Auto-generated method stub
return null;
}
/**
* Does nothing. All Json is treated as elements
*/
@Override
public String getAttributeValue(int index) {
// TODO Auto-generated method stub
return null;
}
@Override
public Token current() throws ServletException {
return this.mapToToken(parser.getCurrentToken());
}
@Override
public String getFieldName() throws ServletException{
try {
String text = parser.getCurrentName();
//The following is a workaround for inconsistent Jackson JsonParser
if(text == null){
if(this.lastTokenWasAFieldNameAndCurrentTokenIsANewEntity)
text = this.lastFieldName;
else
text = "__anonymous__";
}
//end of workaround
return text;
} catch (Exception e) {
throw new ServletException(e);
}
}
/**
* Does nothing. Json does not have prefixes
*/
@Override
public String getPrefix() {
return null;
}
@Override
public String getFieldValue() throws ServletException {
try {
return parser.getText();
} catch (Exception e) {
throw new ServletException(e);
}
}
@Override
public boolean hasNext() throws ServletException {
return true; //FIXME fairly obtuse, is there a better way (advancing, then rewinding?)
}
@Override
public Token next() throws ServletException {
JsonToken next;
try {
next = parser.nextToken();
} catch (JsonParseException e) {
throw new ServletException(e);
} catch (IOException e) {
throw new ServletException(e);
}
if(next == null)
throw new ServletException("No more Json Tokens in stream");
//The following is a workaround for inconsistent Jackson JsonParser
if(next == JsonToken.FIELD_NAME){
try {
this.thisTokenIsAFieldName = true;
this.lastFieldName = parser.getCurrentName();
} catch (Exception e) {
//silent
// e.printStackTrace();
}
}else if(next == JsonToken.START_ARRAY || next == JsonToken.START_OBJECT){
if(this.thisTokenIsAFieldName){
this.lastTokenWasAFieldNameAndCurrentTokenIsANewEntity = true;
this.thisTokenIsAFieldName = false;
}else{
this.lastTokenWasAFieldNameAndCurrentTokenIsANewEntity = false;
this.lastFieldName = null;
}
}else{
recordPath = XmlImportUtilities.detectRecordElement(parser);
this.lastTokenWasAFieldNameAndCurrentTokenIsANewEntity = false;
this.lastFieldName = null;
this.thisTokenIsAFieldName = false;
}
//end of workaround
return mapToToken(next);
}
if (recordPath == null)
return;
ImportColumnGroup rootColumnGroup = new ImportColumnGroup();
XmlImportUtilities.importTreeData(new JSONParser(pis), project, recordPath, rootColumnGroup);
XmlImportUtilities.createColumnsFromImport(project, rootColumnGroup);
project.columnModel.update();
}
@Override
public boolean canImportData(String contentType, String fileName) {
if (contentType != null) {
contentType = contentType.toLowerCase().trim();
if("application/json".equals(contentType) ||
"text/json".equals(contentType)) {
return true;
}
} else if (fileName != null) {
fileName = fileName.toLowerCase();
if (
fileName.endsWith(".json") ||
fileName.endsWith(".js")
) {
return true;
protected Token mapToToken(JsonToken token){
switch(token){
case START_ARRAY: return Token.StartEntity;
case END_ARRAY: return Token.EndEntity;
case START_OBJECT: return Token.StartEntity;
case END_OBJECT: return Token.EndEntity;
case VALUE_STRING: return Token.Value;
case FIELD_NAME: return Token.Ignorable; //returned by the getLocalName function()
case VALUE_NUMBER_INT: return Token.Value;
//Json does not have START_DOCUMENT token type (so ignored as default)
//Json does not have END_DOCUMENT token type (so ignored as default)
case VALUE_TRUE : return Token.Value;
case VALUE_NUMBER_FLOAT : return Token.Value;
case VALUE_NULL : return Token.Value;
case VALUE_FALSE : return Token.Value;
case VALUE_EMBEDDED_OBJECT : return Token.Ignorable;
case NOT_AVAILABLE : return Token.Ignorable;
default: return Token.Ignorable;
}
}
return false;
}
}

View File

@ -0,0 +1,21 @@
package com.google.refine.importers;
import java.io.File;
import com.google.refine.importing.FormatGuesser;
public class LineBasedFormatGuesser implements FormatGuesser {
@Override
public String guess(File file, String encoding, String seedFormat) {
SeparatorBasedImporter.Separator sep = SeparatorBasedImporter.guessSeparator(file, encoding);
if (sep != null) {
return "text/line-based/*sv";
}
int[] widths = FixedWidthImporter.guessColumnWidths(file, encoding);
if (widths != null) {
return "text/line-based/fixed-width";
}
return null;
}
}

View File

@ -0,0 +1,105 @@
package com.google.refine.importers;
import java.io.IOException;
import java.io.LineNumberReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;
import org.json.JSONObject;
import com.google.refine.ProjectMetadata;
import com.google.refine.importing.ImportingJob;
import com.google.refine.model.Project;
import com.google.refine.util.JSONUtilities;
public class LineBasedImporter extends TabularImportingParserBase {
public LineBasedImporter() {
super(false);
}
@Override
public JSONObject createParserUIInitializationData(
ImportingJob job, List<JSONObject> fileRecords, String format) {
JSONObject options = super.createParserUIInitializationData(job, fileRecords, format);
JSONUtilities.safePut(options, "lineSeparator", "\n");
JSONUtilities.safePut(options, "linesPerRow", 1);
JSONUtilities.safePut(options, "headerLines", 0);
JSONUtilities.safePut(options, "guessCellValueTypes", true);
return options;
}
@Override
public void parseOneFile(
Project project,
ProjectMetadata metadata,
ImportingJob job,
String fileSource,
Reader reader,
int limit,
JSONObject options,
List<Exception> exceptions
) {
final int linesPerRow = JSONUtilities.getInt(options, "linesPerRow", 1);
final List<Object> columnNames;
if (options.has("columnNames")) {
columnNames = new ArrayList<Object>();
String[] strings = JSONUtilities.getStringArray(options, "columnNames");
for (String s : strings) {
columnNames.add(s);
}
JSONUtilities.safePut(options, "headerLines", 1);
} else {
columnNames = null;
JSONUtilities.safePut(options, "headerLines", 0);
}
final LineNumberReader lnReader = new LineNumberReader(reader);
try {
int skip = JSONUtilities.getInt(options, "ignoreLines", -1);
while (skip > 0) {
lnReader.readLine();
skip--;
}
} catch (IOException e) {
e.printStackTrace();
}
JSONUtilities.safePut(options, "ignoreLines", -1);
TableDataReader dataReader = new TableDataReader() {
boolean usedColumnNames = false;
@Override
public List<Object> getNextRowOfCells() throws IOException {
if (columnNames != null && !usedColumnNames) {
usedColumnNames = true;
return columnNames;
} else {
List<Object> cells = null;
for (int i = 0; i < linesPerRow; i++) {
String line = lnReader.readLine();
if (i == 0) {
if (line == null) {
return null;
} else {
cells = new ArrayList<Object>(linesPerRow);
cells.add(line);
}
} else if (line != null) {
cells.add(line);
} else {
break;
}
}
return cells;
}
}
};
readTable(project, metadata, job, dataReader, fileSource, limit, options, exceptions);
}
}

View File

@ -40,56 +40,44 @@ import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Properties;
import java.util.List;
import org.json.JSONObject;
import org.marc4j.MarcPermissiveStreamReader;
import org.marc4j.MarcWriter;
import org.marc4j.MarcXmlWriter;
import org.marc4j.marc.Record;
import com.google.refine.ProjectMetadata;
import com.google.refine.importers.tree.ImportColumnGroup;
import com.google.refine.importing.ImportingJob;
import com.google.refine.model.Project;
public class MarcImporter implements StreamImporter {
public class MarcImporter extends XmlImporter {
@Override
public void read(
InputStream inputStream,
Project project,
ProjectMetadata metadata, Properties options
) throws ImportException {
int limit = ImporterUtilities.getIntegerOption("limit",options,-1);
int skip = ImporterUtilities.getIntegerOption("skip",options,0);
public void parseOneFile(Project project, ProjectMetadata metadata,
ImportingJob job, String fileSource, InputStream inputStream,
ImportColumnGroup rootColumnGroup, int limit, JSONObject options,
List<Exception> exceptions) {
File tempFile;
try {
tempFile = File.createTempFile("refine-import-", ".marc.xml");
} catch (IOException e) {
throw new ImportException("Unexpected error creating temp file",e);
exceptions.add(new ImportException("Unexpected error creating temp file", e));
return;
}
try {
OutputStream os = new FileOutputStream(tempFile);
try {
MarcPermissiveStreamReader reader = new MarcPermissiveStreamReader(
inputStream,
true,
true
);
MarcWriter writer = new MarcXmlWriter(os, true);
int count = 0;
MarcPermissiveStreamReader reader = new MarcPermissiveStreamReader(
inputStream, true, true);
while (reader.hasNext()) {
Record record = reader.next();
if (skip <= 0) {
if (limit == -1 || count < limit) {
writer.write(record);
count++;
} else {
break;
}
} else {
skip--;
}
}
writer.close();
} finally {
@ -102,7 +90,8 @@ public class MarcImporter implements StreamImporter {
InputStream is = new FileInputStream(tempFile);
try {
new XmlImporter().read(is, project, metadata, options);
super.parseOneFile(project, metadata, job, fileSource, inputStream,
rootColumnGroup, limit, options, exceptions);
} finally {
try {
is.close();
@ -111,31 +100,10 @@ public class MarcImporter implements StreamImporter {
}
}
} catch (FileNotFoundException e) {
throw new ImportException("Input file not found", e);
exceptions.add(new ImportException("Input file not found", e));
return;
} finally {
tempFile.delete();
}
}
@Override
public boolean canImportData(String contentType, String fileName) {
if (contentType != null) {
contentType = contentType.toLowerCase().trim();
if ("application/marc".equals(contentType)) {
return true;
}
} else if (fileName != null) {
fileName = fileName.toLowerCase();
if (
fileName.endsWith(".mrc") ||
fileName.endsWith(".marc") ||
fileName.contains(".mrc.") ||
fileName.contains(".marc.")
) {
return true;
}
}
return false;
}
}

View File

@ -33,64 +33,72 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package com.google.refine.importers;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Map.Entry;
import org.apache.commons.lang.NotImplementedException;
import org.jrdf.JRDFFactory;
import org.jrdf.SortedMemoryJRDFFactory;
import org.jrdf.collection.MemMapFactory;
import org.jrdf.graph.Graph;
import org.jrdf.graph.Triple;
import org.jrdf.parser.ParseException;
import org.jrdf.parser.StatementHandlerException;
import org.jrdf.parser.line.GraphLineParser;
import org.jrdf.parser.line.LineHandler;
import org.jrdf.parser.ntriples.NTriplesParserFactory;
import org.jrdf.util.ClosableIterable;
import org.json.JSONObject;
import static org.jrdf.graph.AnyObjectNode.ANY_OBJECT_NODE;
import static org.jrdf.graph.AnyPredicateNode.ANY_PREDICATE_NODE;
import static org.jrdf.graph.AnySubjectNode.ANY_SUBJECT_NODE;
import com.google.refine.ProjectMetadata;
import com.google.refine.expr.ExpressionUtils;
import com.google.refine.importing.ImportingJob;
import com.google.refine.model.Cell;
import com.google.refine.model.Column;
import com.google.refine.model.ModelException;
import com.google.refine.model.Project;
import com.google.refine.model.Row;
import com.google.refine.util.JSONUtilities;
public class RdfTripleImporter implements ReaderImporter{
public class RdfTripleImporter extends ImportingParserBase {
private JRDFFactory _jrdfFactory;
private NTriplesParserFactory _nTriplesParserFactory;
private MemMapFactory _newMapFactory;
public RdfTripleImporter() {
super(false);
_jrdfFactory = SortedMemoryJRDFFactory.getFactory();
_nTriplesParserFactory = new NTriplesParserFactory();
_newMapFactory = new MemMapFactory();
}
@Override
public void read(Reader reader, Project project, ProjectMetadata metadata, Properties options) throws ImportException {
String baseUrl = options.getProperty("base-url");
public JSONObject createParserUIInitializationData(ImportingJob job,
List<JSONObject> fileRecords, String format) {
throw new NotImplementedException();
}
@Override
public void parseOneFile(Project project, ProjectMetadata metadata,
ImportingJob job, String fileSource, Reader reader, int limit,
JSONObject options, List<Exception> exceptions) {
String baseUrl = JSONUtilities.getString(options, "baseUrl", "");
Graph graph = _jrdfFactory.getNewGraph();
LineHandler lineHandler = _nTriplesParserFactory.createParser(graph, _newMapFactory);
GraphLineParser parser = new GraphLineParser(graph, lineHandler);
try {
parser.parse(reader, baseUrl); // fills JRDF graph
} catch (IOException e) {
throw new ImportException("i/o error while parsing RDF",e);
} catch (ParseException e) {
throw new ImportException("error parsing RDF",e);
} catch (StatementHandlerException e) {
throw new ImportException("error parsing RDF",e);
} catch (Exception e) {
exceptions.add(e);
return;
}
Map<String, List<Row>> subjectToRows = new HashMap<String, List<Row>>();
@ -152,24 +160,4 @@ public class RdfTripleImporter implements ReaderImporter{
triples.iterator().close();
}
}
@Override
public boolean canImportData(String contentType, String fileName) {
if (contentType != null) {
contentType = contentType.toLowerCase().trim();
if("application/rdf+xml".equals(contentType)) {
return true;
}
} else if (fileName != null) {
fileName = fileName.toLowerCase();
if (
fileName.endsWith(".rdf")) {
return true;
}
}
return false;
}
}

View File

@ -0,0 +1,245 @@
/*
Copyright 2010, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.importers;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.json.JSONObject;
import au.com.bytecode.opencsv.CSVParser;
import com.google.refine.ProjectMetadata;
import com.google.refine.importing.ImportingJob;
import com.google.refine.importing.ImportingUtilities;
import com.google.refine.model.Project;
import com.google.refine.util.JSONUtilities;
public class SeparatorBasedImporter extends TabularImportingParserBase {
public SeparatorBasedImporter() {
super(false);
}
@Override
public JSONObject createParserUIInitializationData(ImportingJob job,
List<JSONObject> fileRecords, String format) {
JSONObject options = super.createParserUIInitializationData(job, fileRecords, format);
JSONUtilities.safePut(options, "lineSeparator", "\n");
String separator = guessSeparator(job, fileRecords);
JSONUtilities.safePut(options, "separator", separator != null ? separator : "\t");
JSONUtilities.safePut(options, "guessCellValueTypes", true);
JSONUtilities.safePut(options, "processQuotes", true);
return options;
}
@Override
public void parseOneFile(
Project project,
ProjectMetadata metadata,
ImportingJob job,
String fileSource,
Reader reader,
int limit,
JSONObject options,
List<Exception> exceptions
) {
// String lineSeparator = JSONUtilities.getString(options, "lineSeparator", "\n");
String sep = JSONUtilities.getString(options, "separator", "\t");
boolean processQuotes = JSONUtilities.getBoolean(options, "processQuotes", true);
final CSVParser parser = new CSVParser(
sep.toCharArray()[0],//HACK changing string to char - won't work for multi-char separators.
CSVParser.DEFAULT_QUOTE_CHARACTER,
(char) 0, // escape character
CSVParser.DEFAULT_STRICT_QUOTES,
CSVParser.DEFAULT_IGNORE_LEADING_WHITESPACE,
!processQuotes);
final LineNumberReader lnReader = new LineNumberReader(reader);
TableDataReader dataReader = new TableDataReader() {
long bytesRead = 0;
@Override
public List<Object> getNextRowOfCells() throws IOException {
String line = lnReader.readLine();
if (line == null) {
return null;
} else {
bytesRead += line.length();
return getCells(line, parser, lnReader);
}
}
};
readTable(project, metadata, job, dataReader, fileSource, limit, options, exceptions);
}
static protected ArrayList<Object> getCells(String line, CSVParser parser, LineNumberReader lnReader)
throws IOException{
ArrayList<Object> cells = new ArrayList<Object>();
String[] tokens = parser.parseLineMulti(line);
for (String s : tokens){
cells.add(s);
}
while (parser.isPending()) {
tokens = parser.parseLineMulti(lnReader.readLine());
for (String s : tokens) {
cells.add(s);
}
}
return cells;
}
static public String guessSeparator(ImportingJob job, List<JSONObject> fileRecords) {
for (int i = 0; i < 5 && i < fileRecords.size(); i++) {
JSONObject fileRecord = fileRecords.get(i);
String encoding = ImportingUtilities.getEncoding(fileRecord);
String location = JSONUtilities.getString(fileRecord, "location", null);
if (location != null) {
File file = new File(job.getRawDataDir(), location);
Separator separator = guessSeparator(file, encoding);
if (separator != null) {
return Character.toString(separator.separator);
}
}
}
return null;
}
static public class Separator {
char separator;
int totalCount = 0;
int totalOfSquaredCount = 0;
int currentLineCount = 0;
double averagePerLine;
double stddev;
}
static public Separator guessSeparator(File file, String encoding) {
try {
InputStream is = new FileInputStream(file);
try {
Reader reader = encoding != null ? new InputStreamReader(is, encoding) : new InputStreamReader(is);
LineNumberReader lineNumberReader = new LineNumberReader(reader);
List<Separator> separators = new ArrayList<SeparatorBasedImporter.Separator>();
Map<Character, Separator> separatorMap = new HashMap<Character, SeparatorBasedImporter.Separator>();
int totalBytes = 0;
int lineCount = 0;
String s;
while (totalBytes < 64 * 1024 &&
lineCount < 100 &&
(s = lineNumberReader.readLine()) != null) {
totalBytes += s.length() + 1; // count the new line character
if (s.length() == 0) {
continue;
}
lineCount++;
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
if (!Character.isLetterOrDigit(c) &&
!"\"' .-".contains(s.subSequence(i, i + 1))) {
Separator separator = separatorMap.get(c);
if (separator == null) {
separator = new Separator();
separator.separator = c;
separatorMap.put(c, separator);
separators.add(separator);
}
separator.currentLineCount++;
}
}
for (Separator separator : separators) {
separator.totalCount += separator.currentLineCount;
separator.totalOfSquaredCount += separator.currentLineCount * separator.currentLineCount;
separator.currentLineCount = 0;
}
}
if (separators.size() > 0) {
for (Separator separator : separators) {
separator.averagePerLine = separator.totalCount / (double) lineCount;
separator.stddev = Math.sqrt(
separator.totalOfSquaredCount / (double) lineCount -
separator.averagePerLine * separator.averagePerLine);
}
Collections.sort(separators, new Comparator<Separator>() {
@Override
public int compare(Separator sep0, Separator sep1) {
return Double.compare(sep0.stddev, sep1.stddev);
}
});
for (Separator separator : separators) {
if (separator.stddev / separator.averagePerLine < 0.1) {
return separator;
}
}
}
} finally {
is.close();
}
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
}

View File

@ -0,0 +1,205 @@
/*
Copyright 2011, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.importers;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import org.json.JSONObject;
import com.google.refine.ProjectMetadata;
import com.google.refine.expr.ExpressionUtils;
import com.google.refine.importing.ImportingJob;
import com.google.refine.model.Cell;
import com.google.refine.model.Column;
import com.google.refine.model.ModelException;
import com.google.refine.model.Project;
import com.google.refine.model.Row;
import com.google.refine.util.JSONUtilities;
abstract public class TabularImportingParserBase extends ImportingParserBase {
static public interface TableDataReader {
public List<Object> getNextRowOfCells() throws IOException;
}
@Override
public JSONObject createParserUIInitializationData(ImportingJob job,
List<JSONObject> fileRecords, String format) {
JSONObject options = new JSONObject();
JSONUtilities.safePut(options, "ignoreLines", -1); // number of blank lines at the beginning to ignore
JSONUtilities.safePut(options, "headerLines", 1); // number of header lines
JSONUtilities.safePut(options, "skipDataLines", 0); // number of initial data lines to skip
JSONUtilities.safePut(options, "storeBlankRows", true);
JSONUtilities.safePut(options, "storeBlankCellsAsNulls", true);
JSONUtilities.safePut(options, "includeFileSources", fileRecords.size() > 1);
return options;
}
protected TabularImportingParserBase(boolean useInputStream) {
super(useInputStream);
}
protected void readTable(
Project project,
ProjectMetadata metadata,
ImportingJob job,
TableDataReader reader,
String fileSource,
int limit,
JSONObject options,
List<Exception> exceptions
) {
int ignoreLines = JSONUtilities.getInt(options, "ignoreLines", -1);
int headerLines = JSONUtilities.getInt(options, "headerLines", 1);
int skipDataLines = JSONUtilities.getInt(options, "skipDataLines", 0);
int limit2 = JSONUtilities.getInt(options, "limit", -1);
if (limit > 0) {
if (limit2 > 0) {
limit2 = Math.min(limit, limit2);
} else {
limit2 = limit;
}
}
boolean guessCellValueTypes = JSONUtilities.getBoolean(options, "guessCellValueTypes", true);
boolean storeBlankRows = JSONUtilities.getBoolean(options, "storeBlankRows", true);
boolean storeBlankCellsAsNulls = JSONUtilities.getBoolean(options, "storeBlankCellsAsNulls", true);
boolean includeFileSources = JSONUtilities.getBoolean(options, "includeFileSources", false);
String fileNameColumnName = "File";
if (includeFileSources) {
if (project.columnModel.getColumnByName(fileNameColumnName) == null) {
try {
project.columnModel.addColumn(
0, new Column(project.columnModel.allocateNewCellIndex(), fileNameColumnName), false);
} catch (ModelException e) {
// Ignore: We already checked for duplicate name.
}
}
}
List<String> columnNames = new ArrayList<String>();
List<Object> cells = null;
int rowsWithData = 0;
try {
while (!job.canceled && (cells = reader.getNextRowOfCells()) != null) {
if (ignoreLines > 0) {
ignoreLines--;
continue;
}
if (headerLines > 0) { // header lines
for (int c = 0; c < cells.size(); c++) {
Object cell = cells.get(c);
String columnName;
if (cell == null) {
// add column even if cell is blank
columnName = "";
} else if (cell instanceof Cell) {
columnName = ((Cell) cell).value.toString().trim();
} else {
columnName = cell.toString().trim();
}
ImporterUtilities.appendColumnName(columnNames, c, columnName);
}
headerLines--;
if (headerLines == 0) {
ImporterUtilities.setupColumns(project, columnNames);
}
} else { // data lines
Row row = new Row(columnNames.size());
if (storeBlankRows) {
rowsWithData++;
} else if (cells.size() > 0) {
rowsWithData++;
}
if (skipDataLines <= 0 || rowsWithData > skipDataLines) {
boolean rowHasData = false;
for (int c = 0; c < cells.size(); c++) {
Column column = ImporterUtilities.getOrAllocateColumn(project, columnNames, c);
Object value = cells.get(c);
if (value != null && value instanceof Cell) {
row.setCell(column.getCellIndex(), (Cell) value);
rowHasData = true;
} else if (ExpressionUtils.isNonBlankData(value)) {
Serializable storedValue;
if (value instanceof String) {
storedValue = guessCellValueTypes ?
ImporterUtilities.parseCellValue((String) value) : (String) value;
} else {
storedValue = ExpressionUtils.wrapStorable(value);
}
row.setCell(column.getCellIndex(), new Cell(storedValue, null));
rowHasData = true;
} else if (!storeBlankCellsAsNulls) {
row.setCell(column.getCellIndex(), new Cell("", null));
}
}
if (rowHasData || storeBlankRows) {
if (includeFileSources) {
row.setCell(
project.columnModel.getColumnByName(fileNameColumnName).getCellIndex(),
new Cell(fileSource, null));
}
project.rows.add(row);
}
if (limit2 > 0 && project.rows.size() >= limit2) {
break;
}
}
}
}
} catch (IOException e) {
exceptions.add(e);
}
}
}

View File

@ -0,0 +1,63 @@
package com.google.refine.importers;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.nio.CharBuffer;
import com.google.refine.importing.FormatGuesser;
public class TextFormatGuesser implements FormatGuesser {
@Override
public String guess(File file, String encoding, String seedFormat) {
try {
InputStream is = new FileInputStream(file);
try {
Reader reader = encoding != null ? new InputStreamReader(is, encoding) : new InputStreamReader(is);
int totalBytes = 0;
int bytes;
int lineBreaks = 0;
CharBuffer charBuffer = CharBuffer.allocate(4096);
while (totalBytes < 64 * 1024 && (bytes = reader.read(charBuffer)) > 0) {
lineBreaks += countSubstrings(charBuffer.toString(), "\n");
charBuffer.clear();
totalBytes += bytes;
}
if (lineBreaks > 3) {
return "text/line-based";
}
} finally {
is.close();
}
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
static public int countSubstrings(String s, String sub) {
int count = 0;
int from = 0;
while (from < s.length()) {
int i = s.indexOf(sub, from);
if (i < 0) {
break;
} else {
from = i + sub.length();
count++;
}
}
return count;
}
}

View File

@ -1,238 +0,0 @@
/*
Copyright 2010, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.importers;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.io.Reader;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import org.apache.commons.lang.StringUtils;
import au.com.bytecode.opencsv.CSVParser;
import com.google.refine.ProjectMetadata;
import com.google.refine.expr.ExpressionUtils;
import com.google.refine.model.Cell;
import com.google.refine.model.Project;
import com.google.refine.model.Row;
public class TsvCsvImporter implements ReaderImporter,StreamImporter {
@Override
public void read(Reader reader, Project project, ProjectMetadata metadata, Properties options) throws ImportException {
boolean splitIntoColumns = ImporterUtilities.getBooleanOption("split-into-columns", options, true);
String sep = options.getProperty("separator"); // auto-detect if not present
int ignoreLines = ImporterUtilities.getIntegerOption("ignore", options, -1);
int headerLines = ImporterUtilities.getIntegerOption("header-lines", options, 1);
int limit = ImporterUtilities.getIntegerOption("limit",options,-1);
int skip = ImporterUtilities.getIntegerOption("skip",options,0);
boolean guessValueType = ImporterUtilities.getBooleanOption("guess-value-type", options, true);
boolean ignoreQuotes = ImporterUtilities.getBooleanOption("ignore-quotes", options, false);
LineNumberReader lnReader = new LineNumberReader(reader);
try {
read(lnReader, project, sep,
limit, skip, ignoreLines, headerLines,
guessValueType, splitIntoColumns, ignoreQuotes
);
} catch (IOException e) {
throw new ImportException("Import failed",e);
}
}
/**
*
* @param lnReader
* LineNumberReader used to read file or string contents
* @param project
* The project into which the parsed data will be added
* @param sep
* The character used to denote different the break between data points
* @param limit
* The maximum number of rows of data to import
* @param skip
* The number of initial data rows to skip
* @param ignoreLines
* The number of initial lines within the data source which should be ignored entirely
* @param headerLines
* The number of lines in the data source which describe each column
* @param guessValueType
* Whether the parser should try and guess the type of the value being parsed
* @param splitIntoColumns
* Whether the parser should try and split the data source into columns
* @param ignoreQuotes
* Quotation marks are ignored, and all separators and newlines treated as such regardless of whether they are within quoted values
* @throws IOException
*/
public void read(LineNumberReader lnReader, Project project, String sep, int limit, int skip, int ignoreLines, int headerLines, boolean guessValueType, boolean splitIntoColumns, boolean ignoreQuotes ) throws IOException{
CSVParser parser = (sep != null && sep.length() > 0 && splitIntoColumns) ?
new CSVParser(sep.toCharArray()[0],//HACK changing string to char - won't work for multi-char separators.
CSVParser.DEFAULT_QUOTE_CHARACTER,
(char) 0, // escape character
CSVParser.DEFAULT_STRICT_QUOTES,
CSVParser.DEFAULT_IGNORE_LEADING_WHITESPACE,
ignoreQuotes) : null;
List<String> columnNames = new ArrayList<String>();
String line = null;
int rowsWithData = 0;
while ((line = lnReader.readLine()) != null) {
if (ignoreLines > 0) {
ignoreLines--;
continue;
} else if (StringUtils.isBlank(line)) {
continue;
}
//guess separator
if (parser == null) {
int tab = line.indexOf('\t');
if (tab >= 0) {
parser = new CSVParser('\t',
CSVParser.DEFAULT_QUOTE_CHARACTER,
(char) 0, // escape character
CSVParser.DEFAULT_STRICT_QUOTES,
CSVParser.DEFAULT_IGNORE_LEADING_WHITESPACE,
ignoreQuotes);
} else {
parser = new CSVParser(',',
CSVParser.DEFAULT_QUOTE_CHARACTER,
(char) 0, // escape character
CSVParser.DEFAULT_STRICT_QUOTES,
CSVParser.DEFAULT_IGNORE_LEADING_WHITESPACE,
ignoreQuotes);
}
}
if (headerLines > 0) {
//column headers
headerLines--;
ArrayList<String> cells = getCells(line, parser, lnReader, splitIntoColumns);
for (int c = 0; c < cells.size(); c++) {
String cell = cells.get(c).trim();
//add column even if cell is blank
ImporterUtilities.appendColumnName(columnNames, c, cell);
}
} else {
//data
Row row = new Row(columnNames.size());
ArrayList<String> cells = getCells(line, parser, lnReader, splitIntoColumns);
if( cells != null && cells.size() > 0 )
rowsWithData++;
if (skip <=0 || rowsWithData > skip){
//add parsed data to row
for(String s : cells){
if (ExpressionUtils.isNonBlankData(s)) {
Serializable value = guessValueType ? ImporterUtilities.parseCellValue(s) : s;
row.cells.add(new Cell(value, null));
}else{
row.cells.add(null);
}
}
project.rows.add(row);
project.columnModel.setMaxCellIndex(row.cells.size());
ImporterUtilities.ensureColumnsInRowExist(columnNames, row);
if (limit > 0 && project.rows.size() >= limit) {
break;
}
}
}
}
ImporterUtilities.setupColumns(project, columnNames);
}
protected ArrayList<String> getCells(String line, CSVParser parser, LineNumberReader lnReader, boolean splitIntoColumns) throws IOException{
ArrayList<String> cells = new ArrayList<String>();
if(splitIntoColumns){
String[] tokens = parser.parseLineMulti(line);
for(String s : tokens){
cells.add(s);
}
while(parser.isPending()){
tokens = parser.parseLineMulti(lnReader.readLine());
for(String s : tokens){
cells.add(s);
}
}
}else{
cells.add(line);
}
return cells;
}
@Override
public void read(InputStream inputStream, Project project,
ProjectMetadata metadata, Properties options) throws ImportException {
read(new InputStreamReader(inputStream), project, metadata, options);
}
@Override
public boolean canImportData(String contentType, String fileName) {
if (contentType != null) {
contentType = contentType.toLowerCase().trim();
return
"text/plain".equals(contentType) ||
"text/csv".equals(contentType) ||
"text/x-csv".equals(contentType) ||
"text/tab-separated-value".equals(contentType);
} else if (fileName != null) {
fileName = fileName.toLowerCase();
if (fileName.endsWith(".tsv")) {
return true;
}else if (fileName.endsWith(".csv")){
return true;
}
}
return false;
}
}

View File

@ -33,99 +33,274 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package com.google.refine.importers;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
import java.util.Properties;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.servlet.ServletException;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import org.json.JSONArray;
import org.json.JSONObject;
import com.google.refine.ProjectMetadata;
import com.google.refine.importers.TreeImportUtilities.ImportColumnGroup;
import com.google.refine.importers.parsers.TreeParser;
import com.google.refine.importers.parsers.XmlParser;
import com.google.refine.importers.tree.ImportColumnGroup;
import com.google.refine.importers.tree.TreeImportingParserBase;
import com.google.refine.importers.tree.TreeReader;
import com.google.refine.importing.ImportingJob;
import com.google.refine.importing.ImportingUtilities;
import com.google.refine.model.Project;
import com.google.refine.util.JSONUtilities;
public class XmlImporter implements StreamImporter {
public class XmlImporter extends TreeImportingParserBase {
public XmlImporter() {
super(true);
}
final static Logger logger = LoggerFactory.getLogger("XmlImporter");
static private class PreviewParsingState {
int tokenCount;
}
public static final int BUFFER_SIZE = 64 * 1024;
final static private int PREVIEW_PARSING_LIMIT = 1000;
@Override
public void read(
InputStream inputStream,
Project project,
ProjectMetadata metadata, Properties options
) throws ImportException {
logger.trace("XmlImporter.read");
PushbackInputStream pis = new PushbackInputStream(inputStream,BUFFER_SIZE);
String[] recordPath = null;
{
byte[] buffer = new byte[BUFFER_SIZE];
int bytes_read = 0;
try {//fill the buffer with data
while (bytes_read < BUFFER_SIZE) {
int c = pis.read(buffer, bytes_read, BUFFER_SIZE - bytes_read);
if (c == -1) break;
bytes_read +=c ;
}
pis.unread(buffer, 0, bytes_read);
} catch (IOException e) {
throw new ImportException("Read error",e);
}
InputStream iStream = new ByteArrayInputStream(buffer, 0, bytes_read);
TreeParser parser = new XmlParser(iStream);
if (options.containsKey("importer-record-tag")) {
public JSONObject createParserUIInitializationData(
ImportingJob job, List<JSONObject> fileRecords, String format) {
JSONObject options = super.createParserUIInitializationData(job, fileRecords, format);
try {
recordPath = XmlImportUtilities.detectPathFromTag(
parser,
options.getProperty("importer-record-tag"));
}catch(Exception e){
// silent
// e.printStackTrace();
JSONObject firstFileRecord = fileRecords.get(0);
File file = ImportingUtilities.getFile(job, firstFileRecord);
InputStream is = new FileInputStream(file);
try {
XMLStreamReader parser = createXMLStreamReader(is);
PreviewParsingState state = new PreviewParsingState();
while (parser.hasNext() && state.tokenCount < PREVIEW_PARSING_LIMIT) {
int tokenType = parser.next();
state.tokenCount++;
if (tokenType == XMLStreamConstants.START_ELEMENT) {
JSONObject rootElement = descendElement(parser, state);
if (rootElement != null) {
JSONUtilities.safePut(options, "dom", rootElement);
break;
}
} else {
recordPath = XmlImportUtilities.detectRecordElement(parser);
// ignore everything else
}
}
} finally {
is.close();
}
} catch (XMLStreamException e) {
// Ignore
} catch (IOException e) {
// Ignore
}
return options;
}
final static private JSONObject descendElement(XMLStreamReader parser, PreviewParsingState state) throws XMLStreamException {
JSONObject result = new JSONObject();
{
String name = parser.getLocalName();
JSONUtilities.safePut(result, "n", name);
String prefix = parser.getPrefix();
if (prefix != null) {
JSONUtilities.safePut(result, "p", prefix);
}
String nsUri = parser.getNamespaceURI();
if (nsUri != null) {
JSONUtilities.safePut(result, "uri", nsUri);
}
}
if (recordPath == null)
return;
int namespaceCount = parser.getNamespaceCount();
if (namespaceCount > 0) {
JSONArray namespaces = new JSONArray();
JSONUtilities.safePut(result, "ns", namespaces);
ImportColumnGroup rootColumnGroup = new ImportColumnGroup();
XmlImportUtilities.importTreeData(new XmlParser(pis), project, recordPath, rootColumnGroup);
XmlImportUtilities.createColumnsFromImport(project, rootColumnGroup);
for (int i = 0; i < namespaceCount; i++) {
JSONObject namespace = new JSONObject();
JSONUtilities.append(namespaces, namespace);
JSONUtilities.safePut(namespace, "p", parser.getNamespacePrefix(i));
JSONUtilities.safePut(namespace, "uri", parser.getNamespaceURI(i));
}
}
project.columnModel.update();
int attributeCount = parser.getAttributeCount();
if (attributeCount > 0) {
JSONArray attributes = new JSONArray();
JSONUtilities.safePut(result, "a", attributes);
for (int i = 0; i < attributeCount; i++) {
JSONObject attribute = new JSONObject();
JSONUtilities.append(attributes, attribute);
JSONUtilities.safePut(attribute, "n", parser.getAttributeLocalName(i));
JSONUtilities.safePut(attribute, "v", parser.getAttributeValue(i));
String prefix = parser.getAttributePrefix(i);
if (prefix != null) {
JSONUtilities.safePut(attribute, "p", prefix);
}
}
}
JSONArray children = new JSONArray();
while (parser.hasNext() && state.tokenCount < PREVIEW_PARSING_LIMIT) {
int tokenType = parser.next();
state.tokenCount++;
if (tokenType == XMLStreamConstants.END_ELEMENT) {
break;
} else if (tokenType == XMLStreamConstants.START_ELEMENT) {
JSONObject childElement = descendElement(parser, state);
if (childElement != null) {
JSONUtilities.append(children, childElement);
}
} else if (tokenType == XMLStreamConstants.CHARACTERS ||
tokenType == XMLStreamConstants.CDATA ||
tokenType == XMLStreamConstants.SPACE) {
JSONObject childElement = new JSONObject();
JSONUtilities.safePut(childElement, "t", parser.getText());
JSONUtilities.append(children, childElement);
} else {
// ignore everything else
}
}
if (children.length() > 0) {
JSONUtilities.safePut(result, "c", children);
}
return result;
}
@Override
public boolean canImportData(String contentType, String fileName) {
if (contentType != null) {
contentType = contentType.toLowerCase().trim();
public void parseOneFile(Project project, ProjectMetadata metadata,
ImportingJob job, String fileSource, InputStream inputStream,
ImportColumnGroup rootColumnGroup, int limit, JSONObject options,
List<Exception> exceptions) {
if("application/xml".equals(contentType) ||
"text/xml".equals(contentType) ||
"application/rss+xml".equals(contentType) ||
"application/atom+xml".equals(contentType)) {
return true;
}
} else if (fileName != null) {
fileName = fileName.toLowerCase();
if (
fileName.endsWith(".xml") ||
fileName.endsWith(".atom") ||
fileName.endsWith(".rss")
) {
return true;
try {
parseOneFile(project, metadata, job, fileSource,
new XmlParser(inputStream), rootColumnGroup, limit, options, exceptions);
} catch (XMLStreamException e) {
exceptions.add(e);
}
}
return false;
static public class XmlParser implements TreeReader {
final protected XMLStreamReader parser;
public XmlParser(InputStream inputStream) throws XMLStreamException {
parser = createXMLStreamReader(inputStream);
}
@Override
public Token next() throws ServletException {
try {
if (!parser.hasNext()) {
throw new ServletException("End of XML stream");
}
} catch (XMLStreamException e) {
throw new ServletException(e);
}
int currentToken = -1;
try {
currentToken = parser.next();
} catch (XMLStreamException e) {
throw new ServletException(e);
}
return mapToToken(currentToken);
}
protected Token mapToToken(int token) throws ServletException {
switch(token){
case XMLStreamConstants.START_ELEMENT: return Token.StartEntity;
case XMLStreamConstants.END_ELEMENT: return Token.EndEntity;
case XMLStreamConstants.CHARACTERS: return Token.Value;
case XMLStreamConstants.START_DOCUMENT: return Token.Ignorable;
case XMLStreamConstants.END_DOCUMENT: return Token.Ignorable;
case XMLStreamConstants.SPACE: return Token.Value;
case XMLStreamConstants.PROCESSING_INSTRUCTION: return Token.Ignorable;
case XMLStreamConstants.NOTATION_DECLARATION: return Token.Ignorable;
case XMLStreamConstants.NAMESPACE: return Token.Ignorable;
case XMLStreamConstants.ENTITY_REFERENCE: return Token.Ignorable;
case XMLStreamConstants.DTD: return Token.Ignorable;
case XMLStreamConstants.COMMENT: return Token.Ignorable;
case XMLStreamConstants.CDATA: return Token.Ignorable;
case XMLStreamConstants.ATTRIBUTE: return Token.Ignorable;
default:
return Token.Ignorable;
}
}
@Override
public Token current() throws ServletException{
return this.mapToToken(parser.getEventType());
}
@Override
public boolean hasNext() throws ServletException{
try {
return parser.hasNext();
} catch (XMLStreamException e) {
throw new ServletException(e);
}
}
@Override
public String getFieldName() throws ServletException{
try{
return parser.getLocalName();
}catch(IllegalStateException e){
return null;
}
}
@Override
public String getPrefix(){
return parser.getPrefix();
}
@Override
public String getFieldValue(){
return parser.getText();
}
@Override
public int getAttributeCount(){
return parser.getAttributeCount();
}
@Override
public String getAttributeValue(int index){
return parser.getAttributeValue(index);
}
@Override
public String getAttributePrefix(int index){
return parser.getAttributePrefix(index);
}
@Override
public String getAttributeLocalName(int index){
return parser.getAttributeLocalName(index);
}
}
final static private XMLStreamReader createXMLStreamReader(InputStream inputStream) throws XMLStreamException {
XMLInputFactory factory = XMLInputFactory.newInstance();
factory.setProperty(XMLInputFactory.IS_COALESCING, true);
factory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, true);
return factory.createXMLStreamReader(inputStream);
}
}

View File

@ -1,210 +0,0 @@
/*
Copyright 2010, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.importers.parsers;
import java.io.IOException;
import java.io.InputStream;
import javax.servlet.ServletException;
import org.codehaus.jackson.JsonFactory;
import org.codehaus.jackson.JsonParseException;
import org.codehaus.jackson.JsonParser;
import org.codehaus.jackson.JsonToken;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class JSONParser implements TreeParser{
final static Logger logger = LoggerFactory.getLogger("JsonParser");
JsonFactory factory = new JsonFactory();
JsonParser parser = null;
//The following is a workaround for inconsistent Jackson JsonParser
Boolean lastTokenWasAFieldNameAndCurrentTokenIsANewEntity = false;
Boolean thisTokenIsAFieldName = false;
String lastFieldName = null;
//end of workaround
public JSONParser(InputStream inputStream){
try {
parser = factory.createJsonParser(inputStream);
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* Does nothing. All Json is treated as elements
*/
@Override
public int getAttributeCount() {
// TODO Auto-generated method stub
return 0;
}
/**
* Does nothing. All Json is treated as elements
*/
@Override
public String getAttributeLocalName(int index) {
return null;
}
/**
* Does nothing. All Json is treated as elements
*/
@Override
public String getAttributePrefix(int index) {
// TODO Auto-generated method stub
return null;
}
/**
* Does nothing. All Json is treated as elements
*/
@Override
public String getAttributeValue(int index) {
// TODO Auto-generated method stub
return null;
}
@Override
public TreeParserToken getEventType() throws ServletException {
return this.mapToTreeParserToken(parser.getCurrentToken());
}
@Override
public String getLocalName() throws ServletException{
try {
String text = parser.getCurrentName();
//The following is a workaround for inconsistent Jackson JsonParser
if(text == null){
if(this.lastTokenWasAFieldNameAndCurrentTokenIsANewEntity)
text = this.lastFieldName;
else
text = "__anonymous__";
}
//end of workaround
return text;
} catch (Exception e) {
throw new ServletException(e);
}
}
/**
* Does nothing. Json does not have prefixes
*/
@Override
public String getPrefix() {
return null;
}
@Override
public String getText() throws ServletException {
try {
return parser.getText();
} catch (Exception e) {
throw new ServletException(e);
}
}
@Override
public boolean hasNext() throws ServletException {
return true; //FIXME fairly obtuse, is there a better way (advancing, then rewinding?)
}
@Override
public TreeParserToken next() throws ServletException {
JsonToken next;
try {
next = parser.nextToken();
} catch (JsonParseException e) {
throw new ServletException(e);
} catch (IOException e) {
throw new ServletException(e);
}
if(next == null)
throw new ServletException("No more Json Tokens in stream");
//The following is a workaround for inconsistent Jackson JsonParser
if(next == JsonToken.FIELD_NAME){
try {
this.thisTokenIsAFieldName = true;
this.lastFieldName = parser.getCurrentName();
} catch (Exception e) {
//silent
}
}else if(next == JsonToken.START_ARRAY || next == JsonToken.START_OBJECT){
if(this.thisTokenIsAFieldName){
this.lastTokenWasAFieldNameAndCurrentTokenIsANewEntity = true;
this.thisTokenIsAFieldName = false;
}else{
this.lastTokenWasAFieldNameAndCurrentTokenIsANewEntity = false;
this.lastFieldName = null;
}
}else{
this.lastTokenWasAFieldNameAndCurrentTokenIsANewEntity = false;
this.lastFieldName = null;
this.thisTokenIsAFieldName = false;
}
//end of workaround
return mapToTreeParserToken(next);
}
protected TreeParserToken mapToTreeParserToken(JsonToken token){
switch(token){
case START_ARRAY: return TreeParserToken.StartEntity;
case END_ARRAY: return TreeParserToken.EndEntity;
case START_OBJECT: return TreeParserToken.StartEntity;
case END_OBJECT: return TreeParserToken.EndEntity;
case VALUE_STRING: return TreeParserToken.Value;
case FIELD_NAME: return TreeParserToken.Ignorable; //returned by the getLocalName function()
case VALUE_NUMBER_INT: return TreeParserToken.Value;
//Json does not have START_DOCUMENT token type (so ignored as default)
//Json does not have END_DOCUMENT token type (so ignored as default)
case VALUE_TRUE : return TreeParserToken.Value;
case VALUE_NUMBER_FLOAT : return TreeParserToken.Value;
case VALUE_NULL : return TreeParserToken.Value;
case VALUE_FALSE : return TreeParserToken.Value;
case VALUE_EMBEDDED_OBJECT : return TreeParserToken.Ignorable;
case NOT_AVAILABLE : return TreeParserToken.Ignorable;
default: return TreeParserToken.Ignorable;
}
}
}

View File

@ -1,70 +0,0 @@
/*
Copyright 2010, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.importers.parsers;
import java.io.LineNumberReader;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import com.google.refine.importers.ImporterUtilities;
import com.google.refine.model.Cell;
import com.google.refine.model.Row;
public class NonSplitRowParser extends RowParser {
public List<String> split(String line, LineNumberReader lineReader) {
List<String> results = new ArrayList<String>(1);
results.add(line.trim());
return results;
}
public boolean parseRow(Row row, String line, boolean guessValueType, LineNumberReader lineReader) {
if (line.trim().isEmpty()) {
return false;
} else {
Serializable value = guessValueType ? ImporterUtilities.parseCellValue(line) : line;
if (value != null) {
row.cells.add(new Cell(value, null));
return true;
} else {
row.cells.add(null);
return false;
}
}
}
}

View File

@ -1,85 +0,0 @@
/*
Copyright 2010, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.importers.parsers;
import java.io.LineNumberReader;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang.StringUtils;
import com.google.refine.expr.ExpressionUtils;
import com.google.refine.importers.ImporterUtilities;
import com.google.refine.model.Cell;
import com.google.refine.model.Row;
public class SeparatorRowParser extends RowParser {
String sep;
public SeparatorRowParser(String sep) {
this.sep = sep;
}
public List<String> split(String line, LineNumberReader lineReader) {
String[] cells = StringUtils.splitPreserveAllTokens(line, sep);
List<String> results = new ArrayList<String>();
for (int c = 0; c < cells.length; c++) {
results.add(cells[c]);
}
return results;
}
public boolean parseRow(Row row, String line, boolean guessValueType, LineNumberReader lineReader) {
boolean hasData = false;
String[] cells = StringUtils.splitPreserveAllTokens(line, sep);
for (int c = 0; c < cells.length; c++) {
String text = cells[c];
Serializable value = guessValueType ? ImporterUtilities.parseCellValue(text) : text;
if (ExpressionUtils.isNonBlankData(value)) {
row.cells.add(new Cell(value, null));
hasData = true;
} else {
row.cells.add(null);
}
}
return hasData;
}
}

View File

@ -1,160 +0,0 @@
/*
Copyright 2010, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.importers.parsers;
import java.io.InputStream;
import javax.servlet.ServletException;
import javax.xml.stream.FactoryConfigurationError;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class XmlParser implements TreeParser{
final static Logger logger = LoggerFactory.getLogger("XmlParser");
XMLStreamReader parser = null;
public XmlParser(InputStream inputStream){
try {
XMLInputFactory factory = XMLInputFactory.newInstance();
factory.setProperty(XMLInputFactory.IS_COALESCING, true);
factory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, true);
parser = factory.createXMLStreamReader(inputStream);
} catch (XMLStreamException e) {
// silent
// e.printStackTrace();
} catch (FactoryConfigurationError e) {
// silent
// e.printStackTrace();
}
}
@Override
public TreeParserToken next() throws ServletException{
try {
if(!parser.hasNext())
throw new ServletException("End of XML stream");
} catch (XMLStreamException e) {
throw new ServletException(e);
}
int currentToken = -1;
try {
currentToken = parser.next();
} catch (XMLStreamException e) {
throw new ServletException(e);
}
return mapToTreeParserToken(currentToken);
}
protected TreeParserToken mapToTreeParserToken(int token) throws ServletException {
switch(token){
case XMLStreamConstants.START_ELEMENT: return TreeParserToken.StartEntity;
case XMLStreamConstants.END_ELEMENT: return TreeParserToken.EndEntity;
case XMLStreamConstants.CHARACTERS: return TreeParserToken.Value;
case XMLStreamConstants.START_DOCUMENT: return TreeParserToken.Ignorable;
case XMLStreamConstants.END_DOCUMENT: return TreeParserToken.Ignorable;
case XMLStreamConstants.SPACE: return TreeParserToken.Value;
case XMLStreamConstants.PROCESSING_INSTRUCTION: return TreeParserToken.Ignorable;
case XMLStreamConstants.NOTATION_DECLARATION: return TreeParserToken.Ignorable;
case XMLStreamConstants.NAMESPACE: return TreeParserToken.Ignorable;
case XMLStreamConstants.ENTITY_REFERENCE: return TreeParserToken.Ignorable;
case XMLStreamConstants.DTD: return TreeParserToken.Ignorable;
case XMLStreamConstants.COMMENT: return TreeParserToken.Ignorable;
case XMLStreamConstants.CDATA: return TreeParserToken.Ignorable;
case XMLStreamConstants.ATTRIBUTE: return TreeParserToken.Ignorable;
default:
return TreeParserToken.Ignorable;
}
}
@Override
public TreeParserToken getEventType() throws ServletException{
return this.mapToTreeParserToken(parser.getEventType());
}
@Override
public boolean hasNext() throws ServletException{
try {
return parser.hasNext();
} catch (XMLStreamException e) {
throw new ServletException(e);
}
}
@Override
public String getLocalName() throws ServletException{
try{
return parser.getLocalName();
}catch(IllegalStateException e){
return null;
}
}
@Override
public String getPrefix(){
return parser.getPrefix();
}
@Override
public String getText(){
return parser.getText();
}
@Override
public int getAttributeCount(){
return parser.getAttributeCount();
}
@Override
public String getAttributeValue(int index){
return parser.getAttributeValue(index);
}
@Override
public String getAttributePrefix(int index){
return parser.getAttributePrefix(index);
}
@Override
public String getAttributeLocalName(int index){
return parser.getAttributeLocalName(index);
}
}

View File

@ -0,0 +1,23 @@
package com.google.refine.importers.tree;
/**
* A column is used to describe a branch-terminating element in a tree structure
*
*/
public class ImportColumn extends ImportVertical {
public int cellIndex;
public int nextRowIndex;
public boolean blankOnFirstRow;
public ImportColumn() {}
public ImportColumn(String name) { //required for testing
super.name = name;
}
@Override
void tabulate() {
// already done the tabulation elsewhere
}
}

View File

@ -0,0 +1,33 @@
package com.google.refine.importers.tree;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.lang.StringUtils;
/**
* A column group describes a branch in tree structured data
*/
public class ImportColumnGroup extends ImportVertical {
public Map<String, ImportColumnGroup> subgroups = new HashMap<String, ImportColumnGroup>();
public Map<String, ImportColumn> columns = new HashMap<String, ImportColumn>();
public int nextRowIndex;
@Override
void tabulate() {
for (ImportColumn c : columns.values()) {
c.tabulate();
nonBlankCount = Math.max(nonBlankCount, c.nonBlankCount);
}
for (ImportColumnGroup g : subgroups.values()) {
g.tabulate();
nonBlankCount = Math.max(nonBlankCount, g.nonBlankCount);
}
}
public String toString() {
return String.format("name=%s, columns={%s}, subgroups={{%s}}",
name,StringUtils.join(columns.keySet(), ','),
StringUtils.join(subgroups.keySet(),','));
}
}

View File

@ -0,0 +1,14 @@
package com.google.refine.importers.tree;
import java.util.LinkedList;
import java.util.List;
import com.google.refine.model.Cell;
/**
* A record describes a data element in a tree-structure
*
*/
public class ImportRecord {
public List<List<Cell>> rows = new LinkedList<List<Cell>>();
}

View File

@ -0,0 +1,8 @@
package com.google.refine.importers.tree;
abstract class ImportVertical {
public String name = "";
public int nonBlankCount;
abstract void tabulate();
}

View File

@ -0,0 +1,16 @@
package com.google.refine.importers.tree;
import java.util.Arrays;
/**
* An element which holds sub-elements we
* shall import as records
*/
class RecordElementCandidate {
String[] path;
int count;
public String toString() {
return Arrays.toString(path);
}
}

View File

@ -31,22 +31,18 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.importers;
package com.google.refine.importers.tree;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.refine.importers.ImporterUtilities;
import com.google.refine.model.Cell;
import com.google.refine.model.Column;
import com.google.refine.model.Project;
@ -54,83 +50,6 @@ import com.google.refine.model.Project;
public abstract class TreeImportUtilities {
final static Logger logger = LoggerFactory.getLogger("TreeImportUtilities");
/**
* An element which holds sub-elements we
* shall import as records
*/
static protected class RecordElementCandidate {
String[] path;
int count;
public String toString() {
return Arrays.toString(path);
}
}
static protected abstract class ImportVertical {
public String name = "";
public int nonBlankCount;
abstract void tabulate();
}
/**
* A column group describes a branch in tree structured data
*/
static public class ImportColumnGroup extends ImportVertical {
public Map<String, ImportColumnGroup> subgroups = new HashMap<String, ImportColumnGroup>();
public Map<String, ImportColumn> columns = new HashMap<String, ImportColumn>();
public int nextRowIndex;
@Override
void tabulate() {
for (ImportColumn c : columns.values()) {
c.tabulate();
nonBlankCount = Math.max(nonBlankCount, c.nonBlankCount);
}
for (ImportColumnGroup g : subgroups.values()) {
g.tabulate();
nonBlankCount = Math.max(nonBlankCount, g.nonBlankCount);
}
}
public String toString() {
return String.format("name=%s, columns={%s}, subgroups={{%s}}",
name,StringUtils.join(columns.keySet(), ','),
StringUtils.join(subgroups.keySet(),','));
}
}
/**
* A column is used to describe a branch-terminating element in a tree structure
*
*/
static public class ImportColumn extends ImportVertical {
public int cellIndex;
public int nextRowIndex;
public boolean blankOnFirstRow;
public ImportColumn() {}
public ImportColumn(String name) { //required for testing
super.name = name;
}
@Override
void tabulate() {
// already done the tabulation elsewhere
}
}
/**
* A record describes a data element in a tree-structure
*
*/
static public class ImportRecord {
public List<List<Cell>> rows = new LinkedList<List<Cell>>();
}
static protected void sortRecordElementCandidates(List<RecordElementCandidate> list) {
Collections.sort(list, new Comparator<RecordElementCandidate>() {
public int compare(RecordElementCandidate o1, RecordElementCandidate o2) {

View File

@ -0,0 +1,169 @@
/*
Copyright 2011, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.importers.tree;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.util.List;
import org.apache.commons.lang.NotImplementedException;
import org.json.JSONObject;
import com.google.refine.ProjectMetadata;
import com.google.refine.importers.ImporterUtilities;
import com.google.refine.importers.ImporterUtilities.MultiFileReadingProgress;
import com.google.refine.importing.ImportingJob;
import com.google.refine.importing.ImportingParser;
import com.google.refine.importing.ImportingUtilities;
import com.google.refine.model.Project;
import com.google.refine.util.JSONUtilities;
abstract public class TreeImportingParserBase implements ImportingParser {
final protected boolean useInputStream;
protected TreeImportingParserBase(boolean useInputStream) {
this.useInputStream = useInputStream;
}
@Override
public JSONObject createParserUIInitializationData(ImportingJob job,
List<JSONObject> fileRecords, String format) {
JSONObject options = new JSONObject();
return options;
}
@Override
public void parse(Project project, ProjectMetadata metadata,
ImportingJob job, List<JSONObject> fileRecords, String format,
int limit, JSONObject options, List<Exception> exceptions) {
MultiFileReadingProgress progress = ImporterUtilities.createMultiFileReadingProgress(job, fileRecords);
ImportColumnGroup rootColumnGroup = new ImportColumnGroup();
for (JSONObject fileRecord : fileRecords) {
try {
parseOneFile(project, metadata, job, fileRecord, rootColumnGroup, limit, options, exceptions, progress);
} catch (IOException e) {
exceptions.add(e);
}
if (limit > 0 && project.rows.size() >= limit) {
break;
}
}
XmlImportUtilities.createColumnsFromImport(project, rootColumnGroup);
project.columnModel.update();
}
public void parseOneFile(
Project project,
ProjectMetadata metadata,
ImportingJob job,
JSONObject fileRecord,
ImportColumnGroup rootColumnGroup,
int limit,
JSONObject options,
List<Exception> exceptions,
final MultiFileReadingProgress progress
) throws IOException {
final File file = ImportingUtilities.getFile(job, fileRecord);
final String fileSource = ImportingUtilities.getFileSource(fileRecord);
progress.startFile(fileSource);
try {
InputStream inputStream = ImporterUtilities.openAndTrackFile(fileSource, file, progress);
try {
if (useInputStream) {
parseOneFile(project, metadata, job, fileSource, inputStream,
rootColumnGroup, limit, options, exceptions);
} else {
Reader reader = ImportingUtilities.getFileReader(file, fileRecord);
parseOneFile(project, metadata, job, fileSource, reader,
rootColumnGroup, limit, options, exceptions);
}
} finally {
inputStream.close();
}
} finally {
progress.endFile(fileSource, file.length());
}
}
public void parseOneFile(
Project project,
ProjectMetadata metadata,
ImportingJob job,
String fileSource,
Reader reader,
ImportColumnGroup rootColumnGroup,
int limit,
JSONObject options,
List<Exception> exceptions
) {
throw new NotImplementedException();
}
public void parseOneFile(
Project project,
ProjectMetadata metadata,
ImportingJob job,
String fileSource,
InputStream inputStream,
ImportColumnGroup rootColumnGroup,
int limit,
JSONObject options,
List<Exception> exceptions
) {
throw new NotImplementedException();
}
protected void parseOneFile(
Project project,
ProjectMetadata metadata,
ImportingJob job,
String fileSource,
TreeReader treeParser,
ImportColumnGroup rootColumnGroup,
int limit,
JSONObject options,
List<Exception> exceptions
) {
String[] recordPath = JSONUtilities.getStringArray(options, "recordPath");
XmlImportUtilities.importTreeData(treeParser, project, recordPath, rootColumnGroup, limit);
}
}

View File

@ -31,17 +31,26 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.importers.parsers;
package com.google.refine.importers.tree;
import javax.servlet.ServletException;
public interface TreeReader {
public enum Token {
Ignorable,
StartEntity,
EndEntity,
Value
//append additional tokens only if necessary (most should be just mapped to Value or Ignorable)
}
public interface TreeParser {
public TreeParserToken next() throws ServletException;
public TreeParserToken getEventType() throws ServletException; //aka getCurrentToken
public boolean hasNext() throws ServletException;
public String getLocalName() throws ServletException; //aka getFieldName
public Token current() throws Exception; //aka getCurrentToken
public boolean hasNext() throws Exception;
public Token next() throws Exception;
public String getFieldName() throws Exception; //aka getFieldName
public String getPrefix();
public String getText() throws ServletException;
public String getFieldValue() throws Exception;
public int getAttributeCount();
public String getAttributeValue(int index);
public String getAttributePrefix(int index);

View File

@ -31,7 +31,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.importers;
package com.google.refine.importers.tree;
import java.util.ArrayList;
import java.util.HashMap;
@ -40,13 +40,10 @@ import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import javax.servlet.ServletException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.refine.importers.parsers.TreeParser;
import com.google.refine.importers.parsers.TreeParserToken;
import com.google.refine.importers.tree.TreeReader.Token;
import com.google.refine.model.Cell;
import com.google.refine.model.Project;
import com.google.refine.model.Row;
@ -54,11 +51,11 @@ import com.google.refine.model.Row;
public class XmlImportUtilities extends TreeImportUtilities {
final static Logger logger = LoggerFactory.getLogger("XmlImportUtilities");
static public String[] detectPathFromTag(TreeParser parser, String tag) {
static public String[] detectPathFromTag(TreeReader parser, String tag) {
try {
while (parser.hasNext()) {
TreeParserToken eventType = parser.next();
if (eventType == TreeParserToken.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
Token eventType = parser.next();
if (eventType == Token.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
List<String> path = detectRecordElement(parser, tag);
if (path != null) {
String[] path2 = new String[path.size()];
@ -90,14 +87,14 @@ public class XmlImportUtilities extends TreeImportUtilities {
* null if the the tag is not found.
* @throws ServletException
*/
static protected List<String> detectRecordElement(TreeParser parser, String tag) throws ServletException {
static protected List<String> detectRecordElement(TreeReader parser, String tag) throws Exception {
try{
if(parser.getEventType() == TreeParserToken.Ignorable)//XMLStreamConstants.START_DOCUMENT)
if(parser.current() == Token.Ignorable)//XMLStreamConstants.START_DOCUMENT)
parser.next();
String localName = parser.getLocalName();
String localName = parser.getFieldName();
String fullName = composeName(parser.getPrefix(), localName);
if (tag.equals(parser.getLocalName()) || tag.equals(fullName)) {
if (tag.equals(parser.getFieldName()) || tag.equals(fullName)) {
List<String> path = new LinkedList<String>();
path.add(localName);
@ -105,10 +102,10 @@ public class XmlImportUtilities extends TreeImportUtilities {
}
while (parser.hasNext()) {
TreeParserToken eventType = parser.next();
if (eventType == TreeParserToken.EndEntity) {//XMLStreamConstants.END_ELEMENT) {
Token eventType = parser.next();
if (eventType == Token.EndEntity) {//XMLStreamConstants.END_ELEMENT) {
break;
} else if (eventType == TreeParserToken.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
} else if (eventType == Token.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
List<String> path = detectRecordElement(parser, tag);
if (path != null) {
path.add(0, localName);
@ -116,7 +113,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
}
}
}
}catch(ServletException e){
} catch (Exception e) {
// silent
// e.printStackTrace();
}
@ -136,18 +133,18 @@ public class XmlImportUtilities extends TreeImportUtilities {
* The path to the most numerous of the possible candidates.
* null if no candidates were found (less than 6 recurrences)
*/
static public String[] detectRecordElement(TreeParser parser) {
static public String[] detectRecordElement(TreeReader parser) {
logger.trace("detectRecordElement(inputStream)");
List<RecordElementCandidate> candidates = new ArrayList<RecordElementCandidate>();
try {
while (parser.hasNext()) {
TreeParserToken eventType = parser.next();
if (eventType == TreeParserToken.StartEntity) {
Token eventType = parser.next();
if (eventType == Token.StartEntity) {
RecordElementCandidate candidate =
detectRecordElement(
parser,
new String[] { parser.getLocalName() });
new String[] { parser.getFieldName() });
if (candidate != null) {
candidates.add(candidate);
@ -168,8 +165,8 @@ public class XmlImportUtilities extends TreeImportUtilities {
return null;
}
static protected RecordElementCandidate detectRecordElement(TreeParser parser, String[] path) {
logger.trace("detectRecordElement(TreeParser, String[])");
static protected RecordElementCandidate detectRecordElement(TreeReader parser, String[] path) {
logger.trace("detectRecordElement(TreeReader, String[])");
List<RecordElementCandidate> descendantCandidates = new ArrayList<RecordElementCandidate>();
Map<String, Integer> immediateChildCandidateMap = new HashMap<String, Integer>();
@ -178,21 +175,21 @@ public class XmlImportUtilities extends TreeImportUtilities {
try {
while (parser.hasNext()) {
TreeParserToken eventType = parser.next();
if (eventType == TreeParserToken.EndEntity ) {
Token eventType = parser.next();
if (eventType == Token.EndEntity ) {
break;
} else if (eventType == TreeParserToken.Value) {
} else if (eventType == Token.Value) {
try{
if (parser.getText().trim().length() > 0) {
if (parser.getFieldValue().trim().length() > 0) {
textNodeCount++;
}
}catch(Exception e){
//silent
}
} else if (eventType == TreeParserToken.StartEntity) {
} else if (eventType == Token.StartEntity) {
childElementNodeCount++;
String tagName = parser.getLocalName();
String tagName = parser.getFieldName();
immediateChildCandidateMap.put(
tagName,
@ -261,17 +258,18 @@ public class XmlImportUtilities extends TreeImportUtilities {
static public void importTreeData(
TreeParser parser,
TreeReader parser,
Project project,
String[] recordPath,
ImportColumnGroup rootColumnGroup
ImportColumnGroup rootColumnGroup,
int limit
) {
logger.trace("importTreeData(TreeParser, Project, String[], ImportColumnGroup)");
logger.trace("importTreeData(TreeReader, Project, String[], ImportColumnGroup)");
try {
while (parser.hasNext()) {
TreeParserToken eventType = parser.next();
if (eventType == TreeParserToken.StartEntity) {
findRecord(project, parser, recordPath, 0, rootColumnGroup);
while (parser.hasNext() && (limit <= 0 || project.rows.size() < limit)) {
Token eventType = parser.next();
if (eventType == Token.StartEntity) {
findRecord(project, parser, recordPath, 0, rootColumnGroup, limit);
}
}
} catch (Exception e) {
@ -292,26 +290,30 @@ public class XmlImportUtilities extends TreeImportUtilities {
*/
static protected void findRecord(
Project project,
TreeParser parser,
TreeReader parser,
String[] recordPath,
int pathIndex,
ImportColumnGroup rootColumnGroup
) throws ServletException {
logger.trace("findRecord(Project, TreeParser, String[], int, ImportColumnGroup");
ImportColumnGroup rootColumnGroup,
int limit
) throws Exception {
logger.trace("findRecord(Project, TreeReader, String[], int, ImportColumnGroup");
if(parser.getEventType() == TreeParserToken.Ignorable){//XMLStreamConstants.START_DOCUMENT){
if(parser.current() == Token.Ignorable){//XMLStreamConstants.START_DOCUMENT){
logger.warn("Cannot use findRecord method for START_DOCUMENT event");
return;
}
String tagName = parser.getLocalName();
if (tagName.equals(recordPath[pathIndex])) {
String recordPathSegment = recordPath[pathIndex];
String localName = parser.getFieldName();
String fullName = composeName(parser.getPrefix(), localName);
if (recordPathSegment.equals(localName) || recordPathSegment.equals(fullName)) {
if (pathIndex < recordPath.length - 1) {
while (parser.hasNext()) {
TreeParserToken eventType = parser.next();
if (eventType == TreeParserToken.StartEntity) {
findRecord(project, parser, recordPath, pathIndex + 1, rootColumnGroup);
} else if (eventType == TreeParserToken.EndEntity ) {
while (parser.hasNext() && (limit <= 0 || project.rows.size() < limit)) {
Token eventType = parser.next();
if (eventType == Token.StartEntity) {
findRecord(project, parser, recordPath, pathIndex + 1, rootColumnGroup, limit);
} else if (eventType == Token.EndEntity ) {
break;
}
}
@ -323,12 +325,12 @@ public class XmlImportUtilities extends TreeImportUtilities {
}
}
static protected void skip(TreeParser parser) throws ServletException {
static protected void skip(TreeReader parser) throws Exception {
while (parser.hasNext()) {
TreeParserToken eventType = parser.next();
if (eventType == TreeParserToken.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
Token eventType = parser.next();
if (eventType == Token.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
skip(parser);
} else if (eventType == TreeParserToken.EndEntity) { //XMLStreamConstants.END_ELEMENT) {
} else if (eventType == Token.EndEntity) { //XMLStreamConstants.END_ELEMENT) {
return;
}
}
@ -344,10 +346,10 @@ public class XmlImportUtilities extends TreeImportUtilities {
*/
static protected void processRecord(
Project project,
TreeParser parser,
TreeReader parser,
ImportColumnGroup rootColumnGroup
) throws ServletException {
logger.trace("processRecord(Project,TreeParser,ImportColumnGroup)");
) throws Exception {
logger.trace("processRecord(Project,TreeReader,ImportColumnGroup)");
ImportRecord record = new ImportRecord();
processSubRecord(project, parser, rootColumnGroup, record);
@ -382,19 +384,19 @@ public class XmlImportUtilities extends TreeImportUtilities {
*/
static protected void processSubRecord(
Project project,
TreeParser parser,
TreeReader parser,
ImportColumnGroup columnGroup,
ImportRecord record
) throws ServletException {
logger.trace("processSubRecord(Project,TreeParser,ImportColumnGroup,ImportRecord)");
) throws Exception {
logger.trace("processSubRecord(Project,TreeReader,ImportColumnGroup,ImportRecord)");
if(parser.getEventType() == TreeParserToken.Ignorable)
if(parser.current() == Token.Ignorable)
return;
ImportColumnGroup thisColumnGroup = getColumnGroup(
project,
columnGroup,
composeName(parser.getPrefix(), parser.getLocalName()));
composeName(parser.getPrefix(), parser.getFieldName()));
thisColumnGroup.nextRowIndex = Math.max(thisColumnGroup.nextRowIndex, columnGroup.nextRowIndex);
@ -413,8 +415,8 @@ public class XmlImportUtilities extends TreeImportUtilities {
}
while (parser.hasNext()) {
TreeParserToken eventType = parser.next();
if (eventType == TreeParserToken.StartEntity) {
Token eventType = parser.next();
if (eventType == Token.StartEntity) {
processSubRecord(
project,
parser,
@ -422,9 +424,9 @@ public class XmlImportUtilities extends TreeImportUtilities {
record
);
} else if (//eventType == XMLStreamConstants.CDATA ||
eventType == TreeParserToken.Value) { //XMLStreamConstants.CHARACTERS) {
String text = parser.getText();
String colName = parser.getLocalName();
eventType == Token.Value) { //XMLStreamConstants.CHARACTERS) {
String text = parser.getFieldValue();
String colName = parser.getFieldName();
if(text != null){
text = text.trim();
if (text.length() > 0) {
@ -437,7 +439,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
);
}
}
} else if (eventType == TreeParserToken.EndEntity) {
} else if (eventType == Token.EndEntity) {
break;
}
}
@ -451,8 +453,4 @@ public class XmlImportUtilities extends TreeImportUtilities {
}
thisColumnGroup.nextRowIndex = nextRowIndex;
}
}

View File

@ -0,0 +1,264 @@
/*
Copyright 2011, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.importing;
import java.io.IOException;
import java.io.Writer;
import java.util.LinkedList;
import java.util.List;
import java.util.Properties;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.json.JSONWriter;
import com.google.refine.RefineServlet;
import com.google.refine.commands.HttpUtilities;
import com.google.refine.importing.ImportingManager.Format;
import com.google.refine.util.JSONUtilities;
import com.google.refine.util.ParsingUtilities;
public class DefaultImportingController implements ImportingController {
protected RefineServlet servlet;
@Override
public void init(RefineServlet servlet) {
this.servlet = servlet;
}
@Override
public void doGet(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
// TODO Auto-generated method stub
}
@Override
public void doPost(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
/*
* The uploaded file is in the POST body as a "file part". If
* we call request.getParameter() then the POST body will get
* read and we won't have a chance to parse the body ourselves.
* This is why we have to parse the URL for parameters ourselves.
*/
Properties parameters = ParsingUtilities.parseUrlParameters(request);
String subCommand = parameters.getProperty("subCommand");
if ("load-raw-data".equals(subCommand)) {
doLoadRawData(request, response, parameters);
} else if ("update-file-selection".equals(subCommand)) {
doUpdateFileSelection(request, response, parameters);
} else if ("initialize-parser-ui".equals(subCommand)) {
doInitializeParserUI(request, response, parameters);
} else if ("update-format-and-options".equals(subCommand)) {
doUpdateFormatAndOptions(request, response, parameters);
} else if ("create-project".equals(subCommand)) {
doCreateProject(request, response, parameters);
} else {
HttpUtilities.respond(response, "error", "No such sub command");
}
}
private void doLoadRawData(HttpServletRequest request, HttpServletResponse response, Properties parameters)
throws ServletException, IOException {
long jobID = Long.parseLong(parameters.getProperty("jobID"));
ImportingJob job = ImportingManager.getJob(jobID);
if (job == null) {
HttpUtilities.respond(response, "error", "No such import job");
return;
}
try {
final JSONObject config = getConfig(job);
if (!("new".equals(config.getString("state")))) {
HttpUtilities.respond(response, "error", "Job already started; cannot load more data");
return;
}
ImportingUtilities.loadDataAndPrepareJob(
request, response, parameters, job, config);
} catch (JSONException e) {
throw new ServletException(e);
}
}
private void doUpdateFileSelection(HttpServletRequest request, HttpServletResponse response, Properties parameters)
throws ServletException, IOException {
long jobID = Long.parseLong(parameters.getProperty("jobID"));
ImportingJob job = ImportingManager.getJob(jobID);
if (job == null) {
HttpUtilities.respond(response, "error", "No such import job");
return;
}
try {
JSONObject config = getConfig(job);
if (!("ready".equals(config.getString("state")))) {
HttpUtilities.respond(response, "error", "Job not ready");
return;
}
JSONArray fileSelectionArray = ParsingUtilities.evaluateJsonStringToArray(
request.getParameter("fileSelection"));
ImportingUtilities.updateJobWithNewFileSelection(job, fileSelectionArray);
replyWithJobData(request, response, job);
} catch (JSONException e) {
throw new ServletException(e);
}
}
private void doUpdateFormatAndOptions(HttpServletRequest request, HttpServletResponse response, Properties parameters)
throws ServletException, IOException {
long jobID = Long.parseLong(parameters.getProperty("jobID"));
ImportingJob job = ImportingManager.getJob(jobID);
if (job == null) {
HttpUtilities.respond(response, "error", "No such import job");
return;
}
try {
JSONObject config = getConfig(job);
if (!("ready".equals(config.getString("state")))) {
HttpUtilities.respond(response, "error", "Job not ready");
return;
}
String format = request.getParameter("format");
JSONObject optionObj = ParsingUtilities.evaluateJsonStringToObject(
request.getParameter("options"));
List<Exception> exceptions = new LinkedList<Exception>();
ImportingUtilities.previewParse(job, format, optionObj, exceptions);
HttpUtilities.respond(response, "ok", "done");
} catch (JSONException e) {
throw new ServletException(e);
}
}
private void doInitializeParserUI(HttpServletRequest request, HttpServletResponse response, Properties parameters)
throws ServletException, IOException {
long jobID = Long.parseLong(parameters.getProperty("jobID"));
ImportingJob job = ImportingManager.getJob(jobID);
if (job == null) {
HttpUtilities.respond(response, "error", "No such import job");
return;
}
String format = request.getParameter("format");
Format formatRecord = ImportingManager.formatToRecord.get(format);
if (formatRecord != null && formatRecord.parser != null) {
JSONObject options = formatRecord.parser.createParserUIInitializationData(
job, ImportingUtilities.getSelectedFileRecords(job), format);
JSONObject result = new JSONObject();
JSONUtilities.safePut(result, "status", "ok");
JSONUtilities.safePut(result, "options", options);
HttpUtilities.respond(response, result.toString());
} else {
HttpUtilities.respond(response, "error", "Unrecognized format or format has no parser");
}
}
private void doCreateProject(HttpServletRequest request, HttpServletResponse response, Properties parameters)
throws ServletException, IOException {
long jobID = Long.parseLong(parameters.getProperty("jobID"));
ImportingJob job = ImportingManager.getJob(jobID);
if (job == null) {
HttpUtilities.respond(response, "error", "No such import job");
return;
}
try {
JSONObject config = getConfig(job);
if (!("ready".equals(config.getString("state")))) {
HttpUtilities.respond(response, "error", "Job not ready");
return;
}
String format = request.getParameter("format");
JSONObject optionObj = ParsingUtilities.evaluateJsonStringToObject(
request.getParameter("options"));
List<Exception> exceptions = new LinkedList<Exception>();
ImportingUtilities.createProject(job, format, optionObj, exceptions);
HttpUtilities.respond(response, "ok", "done");
} catch (JSONException e) {
throw new ServletException(e);
}
}
private JSONObject getConfig(ImportingJob job) {
if (job.config == null) {
job.config = new JSONObject();
JSONUtilities.safePut(job.config, "state", "new");
JSONUtilities.safePut(job.config, "hasData", false);
}
return job.config;
}
private void replyWithJobData(HttpServletRequest request, HttpServletResponse response, ImportingJob job)
throws ServletException, IOException {
Writer w = response.getWriter();
JSONWriter writer = new JSONWriter(w);
try {
writer.object();
writer.key("code"); writer.value("ok");
writer.key("job"); job.write(writer, new Properties());
writer.endObject();
} catch (JSONException e) {
throw new ServletException(e);
} finally {
w.flush();
w.close();
}
}
}

View File

@ -1,6 +1,6 @@
/*
Copyright 2010, Google Inc.
Copyright 2011, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
@ -31,13 +31,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.importers.parsers;
package com.google.refine.importing;
import java.io.File;
public enum TreeParserToken {
Ignorable,
StartEntity,
EndEntity,
Value
//append additional tokens only if necessary (most should be just mapped to Value or Ignorable)
public interface FormatGuesser {
public String guess(File file, String encoding, String seedFormat);
}

View File

@ -1,6 +1,6 @@
/*
Copyright 2010, Google Inc.
Copyright 2011, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
@ -31,17 +31,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.importers;
package com.google.refine.importing;
import com.google.refine.HttpResponder;
public interface Importer {
/**
* Determine whether importer can handle given contentType and filename.
*
* @param contentType
* @param fileName
* @return true if the importer can handle this
*/
public boolean canImportData(String contentType, String fileName);
public interface ImportingController extends HttpResponder {
}

View File

@ -0,0 +1,106 @@
/*
Copyright 2011, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.importing;
import java.io.File;
import java.io.IOException;
import java.util.Properties;
import org.apache.commons.io.FileUtils;
import org.json.JSONException;
import org.json.JSONObject;
import org.json.JSONWriter;
import com.google.refine.Jsonizable;
import com.google.refine.ProjectMetadata;
import com.google.refine.model.Project;
public class ImportingJob implements Jsonizable {
final public long id;
final public File dir; // Temporary directory where the data about this job is stored
public long lastTouched;
public JSONObject config = null;
public Project project;
public ProjectMetadata metadata;
public boolean canceled;
public ImportingJob(long id, File dir) {
this.id = id;
this.dir = dir;
dir.mkdirs();
}
public void touch() {
lastTouched = System.currentTimeMillis();
}
public void prepareNewProject() {
if (project != null) {
project.dispose();
}
project = new Project();
metadata = new ProjectMetadata();
}
public void dispose() {
if (project != null) {
project.dispose();
project = null;
}
metadata = null;
try {
FileUtils.deleteDirectory(dir);
} catch (IOException e) {
}
}
public File getRawDataDir() {
File dir2 = new File(dir, "raw-data");
dir2.mkdirs();
return dir2;
}
@Override
public void write(JSONWriter writer, Properties options)
throws JSONException {
writer.object();
writer.key("config"); writer.value(config);
writer.endObject();
}
}

View File

@ -0,0 +1,257 @@
/*
Copyright 2011, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.importing;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import org.apache.commons.io.FileUtils;
import org.json.JSONException;
import org.json.JSONWriter;
import com.google.refine.RefineServlet;
import edu.mit.simile.butterfly.ButterflyModule;
public class ImportingManager {
static public class Format {
final public String id;
final public String label;
final public boolean download;
final public String uiClass;
final public ImportingParser parser;
private Format(
String id,
String label,
boolean download,
String uiClass,
ImportingParser parser
) {
this.id = id;
this.label = label;
this.download = download;
this.uiClass = uiClass;
this.parser = parser;
}
}
static private RefineServlet servlet;
static private File importDir;
final static private Map<Long, ImportingJob> jobs = new HashMap<Long, ImportingJob>();
// Mapping from format to label, e.g., "text" to "Text files", "text/xml" to "XML files"
final static public Map<String, Format> formatToRecord = new HashMap<String, Format>();
// Mapping from format to guessers
final static public Map<String, List<FormatGuesser>> formatToGuessers = new HashMap<String, List<FormatGuesser>>();
// Mapping from file extension to format, e.g., ".xml" to "text/xml"
final static public Map<String, String> extensionToFormat = new HashMap<String, String>();
// Mapping from mime type to format, e.g., "application/json" to "text/json"
final static public Map<String, String> mimeTypeToFormat = new HashMap<String, String>();
// URL rewriters
final static public Set<UrlRewriter> urlRewriters = new HashSet<UrlRewriter>();
// Mapping from controller name to controller
final static public Map<String, ImportingController> controllers = new HashMap<String, ImportingController>();
static public void initialize(RefineServlet servlet) {
ImportingManager.servlet = servlet;
}
static public void registerFormat(String format, String label) {
registerFormat(format, label, null, null);
}
static public void registerFormat(String format, String label, String uiClass, ImportingParser parser) {
formatToRecord.put(format, new Format(format, label, true, uiClass, parser));
}
static public void registerFormat(
String format, String label, boolean download, String uiClass, ImportingParser parser) {
formatToRecord.put(format, new Format(format, label, download, uiClass, parser));
}
static public void registerFormatGuesser(String format, FormatGuesser guesser) {
List<FormatGuesser> guessers = formatToGuessers.get(format);
if (guessers == null) {
guessers = new LinkedList<FormatGuesser>();
formatToGuessers.put(format, guessers);
}
guessers.add(0, guesser); // prepend so that newer guessers take priority
}
static public void registerExtension(String extension, String format) {
extensionToFormat.put(extension.startsWith(".") ? extension : ("." + extension), format);
}
static public void registerMimeType(String mimeType, String format) {
mimeTypeToFormat.put(mimeType, format);
}
static public void registerUrlRewriter(UrlRewriter urlRewriter) {
urlRewriters.add(urlRewriter);
}
static public void registerController(ButterflyModule module, String name, ImportingController controller) {
String key = module.getName() + "/" + name;
controllers.put(key, controller);
controller.init(servlet);
}
static public File getImportDir() {
if (importDir == null) {
File tempDir = servlet.getTempDir();
importDir = tempDir == null ? new File(".import-temp") : new File(tempDir, "import");
if (importDir.exists()) {
try {
// start fresh
FileUtils.deleteDirectory(importDir);
} catch (IOException e) {
}
}
importDir.mkdirs();
}
return importDir;
}
static public ImportingJob createJob() {
long id = System.currentTimeMillis() + (long) (Math.random() * 1000000);
File jobDir = new File(getImportDir(), Long.toString(id));
ImportingJob job = new ImportingJob(id, jobDir);
jobs.put(id, job);
return job;
}
static public ImportingJob getJob(long id) {
return jobs.get(id);
}
static public void disposeJob(long id) {
ImportingJob job = getJob(id);
if (job != null) {
job.dispose();
jobs.remove(id);
}
}
static public void writeConfiguration(JSONWriter writer, Properties options) throws JSONException {
writer.object();
writer.key("formats");
writer.object();
for (String format : formatToRecord.keySet()) {
Format record = formatToRecord.get(format);
writer.key(format);
writer.object();
writer.key("id"); writer.value(record.id);
writer.key("label"); writer.value(record.label);
writer.key("download"); writer.value(record.download);
writer.key("uiClass"); writer.value(record.uiClass);
writer.endObject();
}
writer.endObject();
writer.key("mimeTypeToFormat");
writer.object();
for (String mimeType : mimeTypeToFormat.keySet()) {
writer.key(mimeType);
writer.value(mimeTypeToFormat.get(mimeType));
}
writer.endObject();
writer.key("extensionToFormat");
writer.object();
for (String extension : extensionToFormat.keySet()) {
writer.key(extension);
writer.value(extensionToFormat.get(extension));
}
writer.endObject();
writer.endObject();
}
static public String getFormatFromFileName(String fileName) {
int start = 0;
while (true) {
int dot = fileName.indexOf('.', start);
if (dot < 0) {
break;
}
String extension = fileName.substring(dot);
String format = extensionToFormat.get(extension);
if (format != null) {
return format;
} else {
start = dot + 1;
}
}
return null;
}
static public String getFormatFromMimeType(String mimeType) {
return mimeTypeToFormat.get(mimeType);
}
static public String getFormat(String fileName, String mimeType) {
String fileNameFormat = getFormatFromFileName(fileName);
String mimeTypeFormat = mimeType == null ? null : getFormatFromMimeType(mimeType);
if (mimeTypeFormat == null) {
return fileNameFormat;
} else if (fileNameFormat == null) {
return mimeTypeFormat;
} else if (fileNameFormat.startsWith(mimeTypeFormat)) {
// file name-based format is more specific
return fileNameFormat;
} else {
return mimeTypeFormat;
}
}
}

View File

@ -1,6 +1,6 @@
/*
Copyright 2010, Google Inc.
Copyright 2011, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
@ -31,33 +31,51 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.importers;
package com.google.refine.importing;
import java.io.Reader;
import java.util.Properties;
import java.util.List;
import org.json.JSONObject;
import com.google.refine.ProjectMetadata;
import com.google.refine.model.Project;
public interface ImportingParser {
/**
* Interface for importers which take a Reader as input.
* Create data sufficient for the parser UI on the client side to do its work.
* For example, an XML parser UI would need to know some sample elements so it
* can let the user pick which the path to the record elements.
*
* @param job
* @param fileRecords
* @param format
* @return JSONObject options
*/
public interface ReaderImporter extends Importer {
public JSONObject createParserUIInitializationData(
ImportingJob job,
List<JSONObject> fileRecords,
String format
);
/**
* Read data from a input reader into project.
*
* @param reader
* reader to import data from. It is assumed to be positioned at
* the correct point and ready to go.
* @param project
* project which will contain data
* @param metadata
* metadata of new project
* @param options
* set of properties with import options
* @throws ImportException
* @param fileRecords
* @param format
* @param limit maximum number of rows to create
* @param options custom options put together by the UI corresponding to this parser,
* which the parser should understand
* @param exceptions
*/
public void read(Reader reader, Project project, ProjectMetadata metadata, Properties options)
throws ImportException;
public void parse(
Project project,
ProjectMetadata metadata,
ImportingJob job,
List<JSONObject> fileRecords,
String format,
int limit,
JSONObject options,
List<Exception> exceptions
);
}

View File

@ -0,0 +1,895 @@
/*
Copyright 2011, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.importing;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.zip.GZIPInputStream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.apache.commons.fileupload.FileItem;
import org.apache.commons.fileupload.FileUploadException;
import org.apache.commons.fileupload.ProgressListener;
import org.apache.commons.fileupload.disk.DiskFileItemFactory;
import org.apache.commons.fileupload.servlet.ServletFileUpload;
import org.apache.commons.fileupload.util.Streams;
import org.apache.commons.io.FileCleaningTracker;
import org.apache.tools.bzip2.CBZip2InputStream;
import org.apache.tools.tar.TarEntry;
import org.apache.tools.tar.TarInputStream;
import org.json.JSONArray;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.refine.ProjectManager;
import com.google.refine.ProjectMetadata;
import com.google.refine.importing.ImportingManager.Format;
import com.google.refine.importing.UrlRewriter.Result;
import com.google.refine.model.Project;
import com.google.refine.util.JSONUtilities;
import com.ibm.icu.text.NumberFormat;
public class ImportingUtilities {
final static protected Logger logger = LoggerFactory.getLogger("importing-utilities");
static public interface Progress {
public void setProgress(String message, int percent);
public boolean isCanceled();
}
static public void loadDataAndPrepareJob(
HttpServletRequest request,
HttpServletResponse response,
Properties parameters,
final ImportingJob job,
JSONObject config) throws IOException, ServletException {
JSONObject retrievalRecord = new JSONObject();
JSONUtilities.safePut(config, "retrievalRecord", retrievalRecord);
JSONUtilities.safePut(config, "state", "loading-raw-data");
final JSONObject progress = new JSONObject();
JSONUtilities.safePut(config, "progress", progress);
try {
ImportingUtilities.retrieveContentFromPostRequest(
request,
parameters,
job.getRawDataDir(),
retrievalRecord,
new Progress() {
@Override
public void setProgress(String message, int percent) {
if (message != null) {
JSONUtilities.safePut(progress, "message", message);
}
JSONUtilities.safePut(progress, "percent", percent);
}
public boolean isCanceled() {
return job.canceled;
}
}
);
} catch (FileUploadException e) {
JSONUtilities.safePut(config, "state", "error");
JSONUtilities.safePut(config, "error", "Error uploading data");
throw new ServletException(e);
}
JSONArray fileSelectionIndexes = new JSONArray();
JSONUtilities.safePut(config, "fileSelection", fileSelectionIndexes);
String bestFormat = ImportingUtilities.autoSelectFiles(job, retrievalRecord, fileSelectionIndexes);
bestFormat = ImportingUtilities.guessBetterFormat(job, bestFormat);
JSONArray rankedFormats = new JSONArray();
JSONUtilities.safePut(config, "rankedFormats", rankedFormats);
ImportingUtilities.rankFormats(job, bestFormat, rankedFormats);
JSONUtilities.safePut(config, "state", "ready");
JSONUtilities.safePut(config, "hasData", true);
config.remove("progress");
}
static public void updateJobWithNewFileSelection(ImportingJob job, JSONArray fileSelectionArray) {
JSONUtilities.safePut(job.config, "fileSelection", fileSelectionArray);
String bestFormat = ImportingUtilities.getCommonFormatForSelectedFiles(job, fileSelectionArray);
bestFormat = ImportingUtilities.guessBetterFormat(job, bestFormat);
JSONArray rankedFormats = new JSONArray();
JSONUtilities.safePut(job.config, "rankedFormats", rankedFormats);
ImportingUtilities.rankFormats(job, bestFormat, rankedFormats);
}
static public void retrieveContentFromPostRequest(
HttpServletRequest request,
Properties parameters,
File rawDataDir,
JSONObject retrievalRecord,
final Progress progress
) throws FileUploadException, IOException {
JSONArray fileRecords = new JSONArray();
JSONUtilities.safePut(retrievalRecord, "files", fileRecords);
int clipboardCount = 0;
int uploadCount = 0;
int downloadCount = 0;
int archiveCount = 0;
// This tracks the total progress, which involves uploading data from the client
// as well as downloading data from URLs.
final SavingUpdate update = new SavingUpdate() {
@Override
public void savedMore() {
progress.setProgress(null, calculateProgressPercent(totalExpectedSize, totalRetrievedSize));
}
@Override
public boolean isCanceled() {
return progress.isCanceled();
}
};
DiskFileItemFactory fileItemFactory = new DiskFileItemFactory();
fileItemFactory.setFileCleaningTracker(new FileCleaningTracker());
ServletFileUpload upload = new ServletFileUpload(fileItemFactory);
upload.setProgressListener(new ProgressListener() {
boolean setContentLength = false;
long lastBytesRead = 0;
@Override
public void update(long bytesRead, long contentLength, int itemCount) {
if (!setContentLength) {
// Only try to set the content length if we really know it.
if (contentLength >= 0) {
update.totalExpectedSize += contentLength;
setContentLength = true;
}
}
if (setContentLength) {
update.totalRetrievedSize += (bytesRead - lastBytesRead);
lastBytesRead = bytesRead;
update.savedMore();
}
}
});
progress.setProgress("Uploading data ...", -1);
for (Object obj : upload.parseRequest(request)) {
if (progress.isCanceled()) {
break;
}
FileItem fileItem = (FileItem) obj;
InputStream stream = fileItem.getInputStream();
String name = fileItem.getFieldName().toLowerCase();
if (fileItem.isFormField()) {
if (name.equals("clipboard")) {
File file = allocateFile(rawDataDir, "clipboard.txt");
JSONObject fileRecord = new JSONObject();
JSONUtilities.safePut(fileRecord, "origin", "clipboard");
JSONUtilities.safePut(fileRecord, "declaredEncoding", request.getCharacterEncoding());
JSONUtilities.safePut(fileRecord, "declaredMimeType", (String) null);
JSONUtilities.safePut(fileRecord, "format", "text");
JSONUtilities.safePut(fileRecord, "fileName", "(clipboard)");
JSONUtilities.safePut(fileRecord, "location", getRelativePath(file, rawDataDir));
progress.setProgress("Uploading pasted clipboard text",
calculateProgressPercent(update.totalExpectedSize, update.totalRetrievedSize));
JSONUtilities.safePut(fileRecord, "size", saveStreamToFile(stream, file, null));
clipboardCount++;
JSONUtilities.append(fileRecords, fileRecord);
} else if (name.equals("download")) {
String urlString = Streams.asString(stream);
URL url = new URL(urlString);
JSONObject fileRecord = new JSONObject();
JSONUtilities.safePut(fileRecord, "origin", "download");
JSONUtilities.safePut(fileRecord, "url", urlString);
for (UrlRewriter rewriter : ImportingManager.urlRewriters) {
Result result = rewriter.rewrite(urlString);
if (result != null) {
urlString = result.rewrittenUrl;
url = new URL(urlString);
JSONUtilities.safePut(fileRecord, "url", urlString);
JSONUtilities.safePut(fileRecord, "format", result.format);
if (!result.download) {
downloadCount++;
JSONUtilities.append(fileRecords, fileRecord);
continue;
}
}
}
URLConnection urlConnection = url.openConnection();
InputStream stream2 = urlConnection.getInputStream();
try {
String fileName = url.getFile();
File file = allocateFile(rawDataDir, fileName);
int contentLength = urlConnection.getContentLength();
if (contentLength >= 0) {
update.totalExpectedSize += contentLength;
}
JSONUtilities.safePut(fileRecord, "declaredEncoding", urlConnection.getContentEncoding());
JSONUtilities.safePut(fileRecord, "declaredMimeType", urlConnection.getContentType());
JSONUtilities.safePut(fileRecord, "fileName", fileName);
JSONUtilities.safePut(fileRecord, "location", getRelativePath(file, rawDataDir));
progress.setProgress("Downloading " + urlString,
calculateProgressPercent(update.totalExpectedSize, update.totalRetrievedSize));
long actualLength = saveStreamToFile(stream, file, update);
JSONUtilities.safePut(fileRecord, "size", actualLength);
if (contentLength >= 0) {
update.totalExpectedSize += (actualLength - contentLength);
} else {
update.totalExpectedSize += actualLength;
}
progress.setProgress("Saving " + urlString + " locally",
calculateProgressPercent(update.totalExpectedSize, update.totalRetrievedSize));
if (postProcessRetrievedFile(file, fileRecord, fileRecords, progress)) {
archiveCount++;
}
downloadCount++;
} finally {
stream2.close();
}
}
} else { // is file content
String fileName = fileItem.getName();
if (fileName.length() > 0) {
long fileSize = fileItem.getSize();
File file = allocateFile(rawDataDir, fileName);
JSONObject fileRecord = new JSONObject();
JSONUtilities.safePut(fileRecord, "origin", "upload");
JSONUtilities.safePut(fileRecord, "declaredEncoding", request.getCharacterEncoding());
JSONUtilities.safePut(fileRecord, "declaredMimeType", fileItem.getContentType());
JSONUtilities.safePut(fileRecord, "fileName", fileName);
JSONUtilities.safePut(fileRecord, "location", getRelativePath(file, rawDataDir));
progress.setProgress(
"Saving file " + fileName + " locally (" + formatBytes(fileSize) + " bytes)",
calculateProgressPercent(update.totalExpectedSize, update.totalRetrievedSize));
JSONUtilities.safePut(fileRecord, "size", saveStreamToFile(stream, file, null));
if (postProcessRetrievedFile(file, fileRecord, fileRecords, progress)) {
archiveCount++;
}
uploadCount++;
}
}
}
JSONUtilities.safePut(retrievalRecord, "uploadCount", uploadCount);
JSONUtilities.safePut(retrievalRecord, "downloadCount", downloadCount);
JSONUtilities.safePut(retrievalRecord, "clipboardCount", clipboardCount);
JSONUtilities.safePut(retrievalRecord, "archiveCount", archiveCount);
}
static public String getRelativePath(File file, File dir) {
String location = file.getAbsolutePath().substring(dir.getAbsolutePath().length());
return (location.startsWith(File.separator)) ? location.substring(1) : location;
}
static public File allocateFile(File dir, String name) {
File file = new File(dir, name);
int dot = name.indexOf('.');
String prefix = dot < 0 ? name : name.substring(0, dot);
String suffix = dot < 0 ? "" : name.substring(dot);
int index = 2;
while (file.exists()) {
file = new File(dir, prefix + "-" + index++ + suffix);
}
file.getParentFile().mkdirs();
return file;
}
static public Reader getFileReader(ImportingJob job, JSONObject fileRecord)
throws FileNotFoundException {
return getFileReader(getFile(job, JSONUtilities.getString(fileRecord, "location", "")), fileRecord);
}
static public Reader getFileReader(File file, JSONObject fileRecord) throws FileNotFoundException {
return getReaderFromStream(new FileInputStream(file), fileRecord);
}
static public Reader getReaderFromStream(InputStream inputStream, JSONObject fileRecord) {
String encoding = getEncoding(fileRecord);
if (encoding != null) {
try {
return new InputStreamReader(inputStream, encoding);
} catch (UnsupportedEncodingException e) {
// Ignore and fall through
}
}
return new InputStreamReader(inputStream);
}
static public File getFile(ImportingJob job, JSONObject fileRecord) {
return getFile(job, JSONUtilities.getString(fileRecord, "location", ""));
}
static public File getFile(ImportingJob job, String location) {
return new File(job.getRawDataDir(), location);
}
static public String getFileSource(JSONObject fileRecord) {
return JSONUtilities.getString(
fileRecord,
"url",
JSONUtilities.getString(fileRecord, "fileName", "unknown")
);
}
static private abstract class SavingUpdate {
public long totalExpectedSize = 0;
public long totalRetrievedSize = 0;
abstract public void savedMore();
abstract public boolean isCanceled();
}
static public long saveStreamToFile(InputStream stream, File file, SavingUpdate update) throws IOException {
long length = 0;
FileOutputStream fos = new FileOutputStream(file);
try {
byte[] bytes = new byte[4096];
int c;
while ((update == null || !update.isCanceled()) && (c = stream.read(bytes)) > 0) {
fos.write(bytes, 0, c);
length += c;
if (update != null) {
update.totalRetrievedSize += c;
update.savedMore();
}
}
return length;
} finally {
fos.close();
}
}
static public boolean postProcessRetrievedFile(
File file, JSONObject fileRecord, JSONArray fileRecords, final Progress progress) {
String mimeType = JSONUtilities.getString(fileRecord, "declaredMimeType", null);
File rawDataDir = file.getParentFile();
InputStream archiveIS = tryOpenAsArchive(file, mimeType);
if (archiveIS != null) {
try {
if (explodeArchive(rawDataDir, archiveIS, fileRecord, fileRecords, progress)) {
file.delete();
return true;
}
} finally {
try {
archiveIS.close();
} catch (IOException e) {
// TODO: what to do?
}
}
}
InputStream uncompressedIS = tryOpenAsCompressedFile(file, mimeType);
if (uncompressedIS != null) {
try {
File file2 = uncompressFile(rawDataDir, uncompressedIS, fileRecord, progress);
file.delete();
file = file2;
} catch (IOException e) {
// TODO: what to do?
e.printStackTrace();
} finally {
try {
archiveIS.close();
} catch (IOException e) {
// TODO: what to do?
}
}
}
postProcessSingleRetrievedFile(file, fileRecord);
JSONUtilities.append(fileRecords, fileRecord);
return false;
}
static public void postProcessSingleRetrievedFile(File file, JSONObject fileRecord) {
if (!fileRecord.has("format")) {
JSONUtilities.safePut(fileRecord, "format",
ImportingManager.getFormat(
file.getName(),
JSONUtilities.getString(fileRecord, "declaredMimeType", null)));
}
}
static public InputStream tryOpenAsArchive(File file, String mimeType) {
String fileName = file.getName();
try {
if (fileName.endsWith(".tar.gz") || fileName.endsWith(".tgz")) {
return new TarInputStream(new GZIPInputStream(new FileInputStream(file)));
} else if (fileName.endsWith(".tar.bz2")) {
return new TarInputStream(new CBZip2InputStream(new FileInputStream(file)));
} else if (fileName.endsWith(".tar")) {
return new TarInputStream(new FileInputStream(file));
} else if (fileName.endsWith(".zip")) {
return new ZipInputStream(new FileInputStream(file));
}
} catch (IOException e) {
}
return null;
}
static public boolean explodeArchive(
File rawDataDir,
InputStream archiveIS,
JSONObject archiveFileRecord,
JSONArray fileRecords,
final Progress progress
) {
if (archiveIS instanceof TarInputStream) {
TarInputStream tis = (TarInputStream) archiveIS;
try {
TarEntry te;
while (!progress.isCanceled() && (te = tis.getNextEntry()) != null) {
if (!te.isDirectory()) {
String fileName2 = te.getName();
File file2 = allocateFile(rawDataDir, fileName2);
progress.setProgress("Extracting " + fileName2, -1);
JSONObject fileRecord2 = new JSONObject();
JSONUtilities.safePut(fileRecord2, "origin", JSONUtilities.getString(archiveFileRecord, "origin", null));
JSONUtilities.safePut(fileRecord2, "declaredEncoding", (String) null);
JSONUtilities.safePut(fileRecord2, "declaredMimeType", (String) null);
JSONUtilities.safePut(fileRecord2, "fileName", fileName2);
JSONUtilities.safePut(fileRecord2, "archiveFileName", JSONUtilities.getString(archiveFileRecord, "fileName", null));
JSONUtilities.safePut(fileRecord2, "location", getRelativePath(file2, rawDataDir));
JSONUtilities.safePut(fileRecord2, "size", saveStreamToFile(tis, file2, null));
postProcessSingleRetrievedFile(file2, fileRecord2);
JSONUtilities.append(fileRecords, fileRecord2);
}
}
} catch (IOException e) {
// TODO: what to do?
e.printStackTrace();
}
return true;
} else if (archiveIS instanceof ZipInputStream) {
ZipInputStream zis = (ZipInputStream) archiveIS;
try {
ZipEntry ze;
while (!progress.isCanceled() && (ze = zis.getNextEntry()) != null) {
if (!ze.isDirectory()) {
String fileName2 = ze.getName();
File file2 = allocateFile(rawDataDir, fileName2);
progress.setProgress("Extracting " + fileName2, -1);
JSONObject fileRecord2 = new JSONObject();
JSONUtilities.safePut(fileRecord2, "origin", JSONUtilities.getString(archiveFileRecord, "origin", null));
JSONUtilities.safePut(fileRecord2, "declaredEncoding", (String) null);
JSONUtilities.safePut(fileRecord2, "declaredMimeType", (String) null);
JSONUtilities.safePut(fileRecord2, "fileName", fileName2);
JSONUtilities.safePut(fileRecord2, "archiveFileName", JSONUtilities.getString(archiveFileRecord, "fileName", null));
JSONUtilities.safePut(fileRecord2, "location", getRelativePath(file2, rawDataDir));
JSONUtilities.safePut(fileRecord2, "size", saveStreamToFile(zis, file2, null));
postProcessSingleRetrievedFile(file2, fileRecord2);
JSONUtilities.append(fileRecords, fileRecord2);
}
}
} catch (IOException e) {
// TODO: what to do?
e.printStackTrace();
}
return true;
}
return false;
}
static public InputStream tryOpenAsCompressedFile(File file, String mimeType) {
String fileName = file.getName();
try {
if (fileName.endsWith(".gz")) {
return new GZIPInputStream(new FileInputStream(file));
} else if (fileName.endsWith(".bz2")) {
return new CBZip2InputStream(new FileInputStream(file));
}
} catch (IOException e) {
}
return null;
}
static public File uncompressFile(
File rawDataDir,
InputStream uncompressedIS,
JSONObject fileRecord,
final Progress progress
) throws IOException {
String fileName = JSONUtilities.getString(fileRecord, "fileName", "unknown");
File file2 = allocateFile(rawDataDir, fileName);
progress.setProgress("Uncompressing " + fileName, -1);
saveStreamToFile(uncompressedIS, file2, null);
JSONUtilities.safePut(fileRecord, "declaredEncoding", (String) null);
JSONUtilities.safePut(fileRecord, "declaredMimeType", (String) null);
JSONUtilities.safePut(fileRecord, "location", getRelativePath(file2, rawDataDir));
return file2;
}
static private int calculateProgressPercent(long totalExpectedSize, long totalRetrievedSize) {
return totalExpectedSize == 0 ? -1 : (int) (totalRetrievedSize * 100 / totalExpectedSize);
}
static private String formatBytes(long bytes) {
return NumberFormat.getIntegerInstance().format(bytes);
}
static public String getEncoding(JSONObject fileRecord) {
String encoding = JSONUtilities.getString(fileRecord, "encoding", null);
if (encoding == null) {
encoding = JSONUtilities.getString(fileRecord, "declaredEncoding", null);
}
return encoding;
}
static public String autoSelectFiles(ImportingJob job, JSONObject retrievalRecord, JSONArray fileSelectionIndexes) {
final Map<String, Integer> formatToCount = new HashMap<String, Integer>();
List<String> formats = new ArrayList<String>();
JSONArray fileRecords = JSONUtilities.getArray(retrievalRecord, "files");
int count = fileRecords.length();
for (int i = 0; i < count; i++) {
JSONObject fileRecord = JSONUtilities.getObjectElement(fileRecords, i);
String format = JSONUtilities.getString(fileRecord, "format", null);
if (format != null) {
if (formatToCount.containsKey(format)) {
formatToCount.put(format, formatToCount.get(format) + 1);
} else {
formatToCount.put(format, 1);
formats.add(format);
}
}
}
Collections.sort(formats, new Comparator<String>() {
@Override
public int compare(String o1, String o2) {
return formatToCount.get(o2) - formatToCount.get(o1);
}
});
String bestFormat = formats.size() > 0 ? formats.get(0) : null;
if (JSONUtilities.getInt(retrievalRecord, "archiveCount", 0) == 0) {
// If there's no archive, then select everything
for (int i = 0; i < count; i++) {
JSONUtilities.append(fileSelectionIndexes, i);
}
} else {
// Otherwise, select files matching the best format
for (int i = 0; i < count; i++) {
JSONObject fileRecord = JSONUtilities.getObjectElement(fileRecords, i);
String format = JSONUtilities.getString(fileRecord, "format", null);
if (format != null && format.equals(bestFormat)) {
JSONUtilities.append(fileSelectionIndexes, i);
}
}
}
return bestFormat;
}
static public String getCommonFormatForSelectedFiles(ImportingJob job, JSONArray fileSelectionIndexes) {
JSONObject retrievalRecord = JSONUtilities.getObject(job.config, "retrievalRecord");
final Map<String, Integer> formatToCount = new HashMap<String, Integer>();
List<String> formats = new ArrayList<String>();
JSONArray fileRecords = JSONUtilities.getArray(retrievalRecord, "files");
int count = fileSelectionIndexes.length();
for (int i = 0; i < count; i++) {
int index = JSONUtilities.getIntElement(fileSelectionIndexes, i, -1);
if (index >= 0 && index < fileRecords.length()) {
JSONObject fileRecord = JSONUtilities.getObjectElement(fileRecords, index);
String format = JSONUtilities.getString(fileRecord, "format", null);
if (format != null) {
if (formatToCount.containsKey(format)) {
formatToCount.put(format, formatToCount.get(format) + 1);
} else {
formatToCount.put(format, 1);
formats.add(format);
}
}
}
}
Collections.sort(formats, new Comparator<String>() {
@Override
public int compare(String o1, String o2) {
return formatToCount.get(o2) - formatToCount.get(o1);
}
});
return formats.size() > 0 ? formats.get(0) : null;
}
static String guessBetterFormat(ImportingJob job, String bestFormat) {
JSONObject retrievalRecord = JSONUtilities.getObject(job.config, "retrievalRecord");
return retrievalRecord != null ? guessBetterFormat(job, retrievalRecord, bestFormat) : bestFormat;
}
static String guessBetterFormat(ImportingJob job, JSONObject retrievalRecord, String bestFormat) {
JSONArray fileRecords = JSONUtilities.getArray(retrievalRecord, "files");
return fileRecords != null ? guessBetterFormat(job, fileRecords, bestFormat) : bestFormat;
}
static String guessBetterFormat(ImportingJob job, JSONArray fileRecords, String bestFormat) {
if (bestFormat != null && fileRecords != null && fileRecords.length() > 0) {
JSONObject firstFileRecord = JSONUtilities.getObjectElement(fileRecords, 0);
String encoding = getEncoding(firstFileRecord);
String location = JSONUtilities.getString(firstFileRecord, "location", null);
if (location != null) {
File file = new File(job.getRawDataDir(), location);
while (true) {
String betterFormat = null;
List<FormatGuesser> guessers = ImportingManager.formatToGuessers.get(bestFormat);
if (guessers != null) {
for (FormatGuesser guesser : guessers) {
betterFormat = guesser.guess(file, encoding, bestFormat);
if (betterFormat != null) {
break;
}
}
}
if (betterFormat != null && !betterFormat.equals(bestFormat)) {
bestFormat = betterFormat;
} else {
break;
}
}
}
}
return bestFormat;
}
static void rankFormats(ImportingJob job, final String bestFormat, JSONArray rankedFormats) {
final Map<String, String[]> formatToSegments = new HashMap<String, String[]>();
boolean download = bestFormat == null ? true :
ImportingManager.formatToRecord.get(bestFormat).download;
List<String> formats = new ArrayList<String>(ImportingManager.formatToRecord.keySet().size());
for (String format : ImportingManager.formatToRecord.keySet()) {
Format record = ImportingManager.formatToRecord.get(format);
if (record.uiClass != null && record.parser != null && record.download == download) {
formats.add(format);
formatToSegments.put(format, format.split("/"));
}
}
if (bestFormat == null) {
Collections.sort(formats);
} else {
Collections.sort(formats, new Comparator<String>() {
@Override
public int compare(String format1, String format2) {
if (format1.equals(bestFormat)) {
return -1;
} else if (format2.equals(bestFormat)) {
return 1;
} else {
return compareBySegments(format1, format2);
}
}
int compareBySegments(String format1, String format2) {
int c = commonSegments(format2) - commonSegments(format1);
return c != 0 ? c : format1.compareTo(format2);
}
int commonSegments(String format) {
String[] bestSegments = formatToSegments.get(bestFormat);
String[] segments = formatToSegments.get(format);
if (bestSegments == null || segments == null) {
return 0;
} else {
int i;
for (i = 0; i < bestSegments.length && i < segments.length; i++) {
if (!bestSegments[i].equals(segments[i])) {
break;
}
}
return i;
}
}
});
}
for (String format : formats) {
JSONUtilities.append(rankedFormats, format);
}
}
static public List<JSONObject> getSelectedFileRecords(ImportingJob job) {
List<JSONObject> results = new ArrayList<JSONObject>();
JSONObject retrievalRecord = JSONUtilities.getObject(job.config, "retrievalRecord");
if (retrievalRecord != null) {
JSONArray fileRecordArray = JSONUtilities.getArray(retrievalRecord, "files");
if (fileRecordArray != null) {
JSONArray fileSelectionArray = JSONUtilities.getArray(job.config, "fileSelection");
if (fileSelectionArray != null) {
for (int i = 0; i < fileSelectionArray.length(); i++) {
int index = JSONUtilities.getIntElement(fileSelectionArray, i, -1);
if (index >= 0 && index < fileRecordArray.length()) {
results.add(JSONUtilities.getObjectElement(fileRecordArray, index));
}
}
}
}
}
return results;
}
static public void previewParse(ImportingJob job, String format, JSONObject optionObj, List<Exception> exceptions) {
Format record = ImportingManager.formatToRecord.get(format);
if (record == null || record.parser == null) {
// TODO: what to do?
return;
}
job.prepareNewProject();
record.parser.parse(
job.project,
job.metadata,
job,
getSelectedFileRecords(job),
format,
100,
optionObj,
exceptions
);
job.project.update(); // update all internal models, indexes, caches, etc.
}
static public long createProject(
final ImportingJob job,
final String format,
final JSONObject optionObj,
final List<Exception> exceptions) {
final Format record = ImportingManager.formatToRecord.get(format);
if (record == null || record.parser == null) {
// TODO: what to do?
return -1;
}
JSONUtilities.safePut(job.config, "state", "creating-project");
final Project project = new Project();
new Thread() {
public void run() {
ProjectMetadata pm = new ProjectMetadata();
pm.setName(JSONUtilities.getString(optionObj, "projectName", "Untitled"));
pm.setEncoding(JSONUtilities.getString(optionObj, "encoding", "UTF-8"));
record.parser.parse(
project,
pm,
job,
getSelectedFileRecords(job),
format,
-1,
optionObj,
exceptions
);
if (!job.canceled) {
project.update(); // update all internal models, indexes, caches, etc.
ProjectManager.singleton.registerProject(project, pm);
JSONUtilities.safePut(job.config, "projectID", project.id);
JSONUtilities.safePut(job.config, "state", "created-project");
}
}
}.start();
return project.id;
}
static public void setCreatingProjectProgress(ImportingJob job, String message, int percent) {
JSONObject progress = JSONUtilities.getObject(job.config, "progress");
if (progress == null) {
progress = new JSONObject();
JSONUtilities.safePut(job.config, "progress", progress);
}
JSONUtilities.safePut(progress, "message", message);
JSONUtilities.safePut(progress, "percent", percent);
}
}

View File

@ -31,23 +31,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
var theImportJob = {};
var ui = {};
var Refine = {
};
function resize() {
var header = $("#header");
var leftPanelWidth = 300;
var width = $(window).width();
var top = $("#header").outerHeight();
var height = $(window).height() - top;
package com.google.refine.importing;
public interface UrlRewriter {
static public class Result {
public String rewrittenUrl;
public String format;
public boolean download;
}
function onLoad() {
$(window).bind("resize", resize);
public Result rewrite(String url);
}
$(onLoad);

View File

@ -1,62 +0,0 @@
package com.google.refine.model.meta;
import java.io.File;
import java.io.InputStream;
import java.util.Date;
import java.util.Properties;
import javax.servlet.http.HttpServletRequest;
import org.apache.commons.fileupload.FileItemIterator;
import org.apache.commons.fileupload.FileItemStream;
import org.apache.commons.fileupload.servlet.ServletFileUpload;
import org.json.JSONException;
import org.json.JSONObject;
import org.json.JSONWriter;
import com.google.refine.commands.importing.ImportJob;
public class FileUploadImportSource extends ImportSource {
public String originalFileName;
@Override
protected void customWrite(JSONWriter writer, Properties options)
throws JSONException {
writer.key("originalFileName"); writer.value(originalFileName);
}
@Override
protected void customReconstruct(JSONObject obj) throws JSONException {
if (obj.has("originalFileName")) {
originalFileName = obj.getString("originalFileName");
}
}
@Override
public void retrieveContent(HttpServletRequest request, Properties options, ImportJob job) throws Exception {
ServletFileUpload upload = new ServletFileUpload();
FileItemIterator iter = upload.getItemIterator(request);
while (iter.hasNext()) {
FileItemStream item = iter.next();
if (!item.isFormField()) {
String fileName = item.getName();
if (fileName.length() > 0) {
InputStream stream = item.openStream();
try {
File file = new File(job.dir, "data");
this.accessTime = new Date();
this.contentType = item.getContentType();
this.encoding = request.getCharacterEncoding();
this.originalFileName = fileName;
this.size = saveStreamToFileOrDir(
item.openStream(), file, this.contentType, fileName, job, request.getContentLength());
this.isArchive = file.isDirectory();
} finally {
stream.close();
}
}
}
}
}
}

View File

@ -1,5 +0,0 @@
package com.google.refine.model.meta;
public class ImportConfig {
}

View File

@ -1,167 +0,0 @@
package com.google.refine.model.meta;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Date;
import java.util.Properties;
import java.util.zip.GZIPInputStream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import javax.servlet.http.HttpServletRequest;
import org.apache.tools.bzip2.CBZip2InputStream;
import org.apache.tools.tar.TarEntry;
import org.apache.tools.tar.TarInputStream;
import org.json.JSONException;
import org.json.JSONObject;
import org.json.JSONWriter;
import com.google.refine.Jsonizable;
import com.google.refine.commands.importing.ImportJob;
import com.google.refine.commands.importing.ImportManager;
import com.google.refine.util.ParsingUtilities;
abstract public class ImportSource implements Jsonizable {
public Date accessTime;
public long size;
public boolean isArchive = false;
public String contentType;
public String encoding;
@Override
public void write(JSONWriter writer, Properties options)
throws JSONException {
writer.object();
writer.key("type"); writer.value(ImportManager.getImportSourceClassName(this.getClass()));
writer.key("accessTime"); writer.value(ParsingUtilities.dateToString(accessTime));
writer.key("size"); writer.value(size);
writer.key("isArchive"); writer.value(isArchive);
writer.key("contentType"); writer.value(contentType);
writer.key("encoding"); writer.value(encoding);
writer.endObject();
}
public void reconstruct(JSONObject obj) throws JSONException {
if (obj.has("accessTime")) {
accessTime = ParsingUtilities.stringToDate(obj.getString("accessTime"));
}
if (obj.has("size")) {
size = obj.getLong("size");
}
if (obj.has("isArchive")) {
isArchive = obj.getBoolean("isArchive");
}
if (obj.has("contentType")) {
contentType = obj.getString("contentType");
}
if (obj.has("encoding")) {
encoding = obj.getString("encoding");
}
customReconstruct(obj);
}
abstract public void retrieveContent(HttpServletRequest request, Properties options, ImportJob job)
throws Exception;
abstract protected void customWrite(JSONWriter writer, Properties options) throws JSONException;
abstract protected void customReconstruct(JSONObject obj) throws JSONException;
static protected long saveStreamToFileOrDir(
InputStream is,
File file,
String contentType,
String fileNameOrUrl,
ImportJob job,
long expectedSize
) throws IOException {
InputStream archiveIS = null;
if (fileNameOrUrl != null) {
try {
if (fileNameOrUrl.endsWith(".tar.gz") ||
fileNameOrUrl.endsWith(".tar.gz.gz") ||
fileNameOrUrl.endsWith(".tgz")) {
archiveIS = new TarInputStream(new GZIPInputStream(is));
} else if (fileNameOrUrl.endsWith(".tar.bz2")) {
archiveIS = new TarInputStream(new CBZip2InputStream(is));
} else if (fileNameOrUrl.endsWith(".tar")) {
archiveIS = new TarInputStream(is);
} else if (fileNameOrUrl.endsWith(".zip")) {
archiveIS = new ZipInputStream(is);
}
} catch (IOException e) {
archiveIS = null;
}
}
job.bytesSaved = 0;
if (archiveIS == null) {
saveStreamToFile(is, file, job, true, expectedSize);
} else {
job.retrievingProgress = -1;
// NOTE(SM): unfortunately, java.io does not provide any generalized class for
// archive-like input streams so while both TarInputStream and ZipInputStream
// behave precisely the same, there is no polymorphic behavior so we have
// to treat each instance explicitly... one of those times you wish you had
// closures
if (archiveIS instanceof TarInputStream) {
TarInputStream tis = (TarInputStream) archiveIS;
TarEntry te;
while ((te = tis.getNextEntry()) != null) {
if (!te.isDirectory()) {
saveStreamToFile(tis, new File(file, te.getName()), job, false, 0);
}
}
} else if (archiveIS instanceof ZipInputStream) {
ZipInputStream zis = (ZipInputStream) archiveIS;
ZipEntry ze;
long compressedSize = 0;
while ((ze = zis.getNextEntry()) != null) {
if (!ze.isDirectory()) {
saveStreamToFile(zis, new File(file, ze.getName()), job, false, 0);
compressedSize += ze.getCompressedSize(); // this might be negative if not known
if (compressedSize > 0) {
job.retrievingProgress = (int) (compressedSize * 100 / expectedSize);
}
}
}
}
}
return job.bytesSaved;
}
static private void saveStreamToFile(
InputStream is,
File file,
ImportJob job,
boolean updateProgress,
long expectedSize
) throws IOException {
byte data[] = new byte[4096];
file.getParentFile().mkdirs();
FileOutputStream fos = new FileOutputStream(file);
BufferedOutputStream bos = new BufferedOutputStream(fos, data.length);
int count;
while ((count = is.read(data, 0, data.length)) != -1) {
bos.write(data, 0, count);
job.bytesSaved += count;
if (updateProgress) {
job.retrievingProgress = (int) (job.bytesSaved * 100 / expectedSize);
}
}
bos.flush();
bos.close();
}
}

View File

@ -1,28 +0,0 @@
package com.google.refine.model.meta;
import java.util.Properties;
import javax.servlet.http.HttpServletRequest;
import org.json.JSONException;
import org.json.JSONObject;
import org.json.JSONWriter;
import com.google.refine.commands.importing.ImportJob;
public class TextImportSource extends ImportSource {
@Override
protected void customWrite(JSONWriter writer, Properties options)
throws JSONException {
}
@Override
protected void customReconstruct(JSONObject obj) throws JSONException {
}
@Override
public void retrieveContent(HttpServletRequest request, Properties options, ImportJob job) throws Exception {
// TODO Auto-generated method stub
}
}

View File

@ -1,34 +0,0 @@
package com.google.refine.model.meta;
import java.util.Properties;
import javax.servlet.http.HttpServletRequest;
import org.json.JSONException;
import org.json.JSONObject;
import org.json.JSONWriter;
import com.google.refine.commands.importing.ImportJob;
public class WebImportSource extends ImportSource {
public String url;
@Override
protected void customWrite(JSONWriter writer, Properties options)
throws JSONException {
writer.key("url"); writer.value(url);
}
@Override
protected void customReconstruct(JSONObject obj) throws JSONException {
if (obj.has("url")) {
url = obj.getString("url");
}
}
@Override
public void retrieveContent(HttpServletRequest request, Properties options, ImportJob job) throws Exception {
// TODO Auto-generated method stub
}
}

View File

@ -35,8 +35,10 @@ package com.google.refine.util;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Collection;
import java.util.Date;
import java.util.List;
import java.util.Map;
import org.json.JSONArray;
import org.json.JSONException;
@ -44,6 +46,14 @@ import org.json.JSONObject;
import org.json.JSONWriter;
public class JSONUtilities {
static public JSONObject getObject(JSONObject obj, String key) {
try {
return obj.getJSONObject(key);
} catch (JSONException e) {
return null;
}
}
static public String getString(JSONObject obj, String key, String def) {
try {
return obj.getString(key);
@ -94,6 +104,14 @@ public class JSONUtilities {
}
}
static public JSONArray getArray(JSONObject obj, String key) {
try {
return obj.getJSONArray(key);
} catch (JSONException e) {
return null;
}
}
static public int[] getIntArray(JSONObject obj, String key) {
try {
JSONArray a = obj.getJSONArray(key);
@ -144,6 +162,14 @@ public class JSONUtilities {
writer.endArray();
}
static public void writeStringArray(JSONWriter writer, String[] strings) throws JSONException {
writer.array();
for (String s : strings) {
writer.value(s);
}
writer.endArray();
}
static public void putField(JSONObject obj, String key, Object value) throws JSONException {
if (value instanceof Integer) {
obj.put(key, ((Integer) value).intValue());
@ -164,6 +190,135 @@ public class JSONUtilities {
}
}
static public JSONObject getObjectElement(JSONArray a, int i) {
try {
return a.getJSONObject(i);
} catch (JSONException e) {
return null;
}
}
static public int getIntElement(JSONArray a, int i, int def) {
try {
return a.getInt(i);
} catch (JSONException e) {
return def;
}
}
static public void append(JSONArray a, JSONObject element) {
try {
a.put(a.length(), element);
} catch (JSONException e) {
}
}
static public void append(JSONArray a, Object element) {
try {
a.put(a.length(), element);
} catch (JSONException e) {
}
}
static public void append(JSONArray a, int element) {
try {
a.put(a.length(), element);
} catch (JSONException e) {
}
}
static public void append(JSONArray a, long element) {
try {
a.put(a.length(), element);
} catch (JSONException e) {
}
}
static public void append(JSONArray a, double element) {
try {
a.put(a.length(), element);
} catch (JSONException e) {
}
}
static public void append(JSONArray a, boolean element) {
try {
a.put(a.length(), element);
} catch (JSONException e) {
}
}
static public void append(JSONArray a, String element) {
try {
a.put(a.length(), element);
} catch (JSONException e) {
}
}
static public void safePut(JSONObject obj, String key, int value) {
try {
obj.put(key, value);
} catch (JSONException e) {
// Ignore: the JSONObject is just too happy about throwing exceptions.
}
}
static public void safePut(JSONObject obj, String key, long value) {
try {
obj.put(key, value);
} catch (JSONException e) {
// Ignore: the JSONObject is just too happy about throwing exceptions.
}
}
static public void safePut(JSONObject obj, String key, double value) {
try {
obj.put(key, value);
} catch (JSONException e) {
// Ignore: the JSONObject is just too happy about throwing exceptions.
}
}
static public void safePut(JSONObject obj, String key, boolean value) {
try {
obj.put(key, value);
} catch (JSONException e) {
// Ignore: the JSONObject is just too happy about throwing exceptions.
}
}
static public void safePut(JSONObject obj, String key, String value) {
try {
obj.put(key, value);
} catch (JSONException e) {
// Ignore: the JSONObject is just too happy about throwing exceptions.
}
}
static public void safePut(JSONObject obj, String key, Collection<?> value) {
try {
obj.put(key, value);
} catch (JSONException e) {
// Ignore: the JSONObject is just too happy about throwing exceptions.
}
}
static public void safePut(JSONObject obj, String key, Map<?, ?> value) {
try {
obj.put(key, value);
} catch (JSONException e) {
// Ignore: the JSONObject is just too happy about throwing exceptions.
}
}
static public void safePut(JSONObject obj, String key, Object value) {
try {
obj.put(key, value);
} catch (JSONException e) {
// Ignore: the JSONObject is just too happy about throwing exceptions.
}
}
static public Object[] toArray(JSONArray a) throws JSONException {
int l = a.length();

View File

@ -0,0 +1,64 @@
package com.google.refine.util;
import java.io.IOException;
import java.io.InputStream;
public class TrackingInputStream extends InputStream {
final private InputStream is;
protected long bytesRead;
public TrackingInputStream(InputStream is) {
this.is = is;
}
public long getBytesRead() {
return bytesRead;
}
@Override
public int read() throws IOException {
return (int) track(is.read());
}
@Override
public int read(byte[] b) throws IOException {
return (int) track(is.read(b));
}
@Override
public int read(byte[] b, int off, int len) throws IOException {
return (int) track(is.read(b, off, len));
}
@Override
public long skip(long n) throws IOException {
return track(is.skip(n));
}
@Override
public void mark(int readlimit) {
is.mark(readlimit);
}
@Override
public void reset() throws IOException {
is.reset();
}
@Override
public boolean markSupported() {
return is.markSupported();
}
@Override
public void close() throws IOException {
is.close();
}
protected long track(long bytesRead) {
if (bytesRead > 0) {
this.bytesRead += bytesRead;
}
return bytesRead;
}
}

View File

@ -33,6 +33,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package com.google.refine.tests;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.testng.Assert;
import org.testng.annotations.BeforeSuite;
@ -41,6 +48,7 @@ import com.google.refine.model.Cell;
import com.google.refine.model.Column;
import com.google.refine.model.Project;
import com.google.refine.model.Row;
import com.google.refine.util.JSONUtilities;
public class RefineTest {
@ -82,4 +90,41 @@ public class RefineTest {
logger.info(sb.toString());
}
}
//----helpers----
static public void whenGetBooleanOption(String name, JSONObject options, Boolean def){
when(options.has(name)).thenReturn(true);
when(JSONUtilities.getBoolean(options, name, def)).thenReturn(def);
}
static public void whenGetIntegerOption(String name, JSONObject options, int def){
when(options.has(name)).thenReturn(true);
when(JSONUtilities.getInt(options, name, def)).thenReturn(def);
}
static public void whenGetStringOption(String name, JSONObject options, String def){
when(options.has(name)).thenReturn(true);
when(JSONUtilities.getString(options, name, def)).thenReturn(def);
}
static public void whenGetObjectOption(String name, JSONObject options, JSONObject def){
when(options.has(name)).thenReturn(true);
when(JSONUtilities.getObject(options, name)).thenReturn(def);
}
static public void whenGetArrayOption(String name, JSONObject options, JSONArray def){
when(options.has(name)).thenReturn(true);
when(JSONUtilities.getArray(options, name)).thenReturn(def);
}
static public void verifyGetOption(String name, JSONObject options){
verify(options, times(1)).has(name);
try {
verify(options, times(1)).get(name);
} catch (JSONException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}

View File

@ -1,14 +1,12 @@
package com.google.refine.tests.importers;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
import java.io.StringReader;
import java.util.Properties;
import org.json.JSONArray;
import org.slf4j.LoggerFactory;
import org.testng.Assert;
import org.testng.annotations.AfterMethod;
@ -16,13 +14,10 @@ import org.testng.annotations.BeforeMethod;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test;
import com.google.refine.ProjectMetadata;
import com.google.refine.importers.FixedWidthImporter;
import com.google.refine.importers.ImportException;
import com.google.refine.model.Project;
import com.google.refine.tests.RefineTest;
import com.google.refine.util.JSONUtilities;
public class FixedWidthImporterTests extends RefineTest {
public class FixedWidthImporterTests extends ImporterTest {
@BeforeTest
public void init() {
logger = LoggerFactory.getLogger(this.getClass());
@ -30,45 +25,20 @@ public class FixedWidthImporterTests extends RefineTest {
//constants
String SAMPLE_ROW = "NDB_NoShrt_DescWater";
String SAMPLE_ROW_WIDTHS = "6,9,5";
//System Under Test
FixedWidthImporter SUT = null;
//mock dependencies
Project project = null;
Properties properties = null;
@BeforeMethod
public void SetUp(){
super.SetUp();
SUT = new FixedWidthImporter();
project = new Project(); //FIXME - should we try and use mock(Project.class); - seems unnecessary complexity
properties = mock(Properties.class);
}
@AfterMethod
public void TearDown(){
SUT = null;
project = null;
properties = null;
}
//TODO a lot of these tests are very similar to the TsvCsvImporterTests. It might be possible to overlap them
@Test
public void canParseSeparator(){
int[] i = null;
try {
i = SUT.getColumnWidthsFromString("1,2,3");
} catch (ImportException e) {
Assert.fail(e.getMessage());
}
Assert.assertNotNull(i);
Assert.assertEquals(i[0], 1);
Assert.assertEquals(i[1], 2);
Assert.assertEquals(i[2], 3);
super.TearDown();
}
//---------------------read tests------------------------
@ -76,19 +46,23 @@ public class FixedWidthImporterTests extends RefineTest {
public void readFixedWidth(){
StringReader reader = new StringReader(SAMPLE_ROW + "\nTooShort");
when(properties.getProperty("fixed-column-widths")).thenReturn(SAMPLE_ROW_WIDTHS);
whenGetIntegerOption("ignore",properties,0);
whenGetIntegerOption("header-lines",properties,0);
whenGetIntegerOption("limit",properties,-1);
whenGetIntegerOption("skip",properties,0);
JSONArray columnWidths = new JSONArray();
JSONUtilities.append(columnWidths, 6);
JSONUtilities.append(columnWidths, 9);
JSONUtilities.append(columnWidths, 5);
whenGetArrayOption("columnWidths", options, columnWidths);
whenGetIntegerOption("ignoreLines", options, 0);
whenGetIntegerOption("headerLines", options, 0);
whenGetIntegerOption("skipDataLines", options, 0);
whenGetIntegerOption("limit", options, -1);
try {
SUT.read(reader, project, new ProjectMetadata(), properties);
parseOneFile(SUT, reader);
} catch (Exception e) {
Assert.fail(e.getMessage());
}
Assert.assertEquals(project.rows.size(), 2);
Assert.assertEquals(project.rows.get(0).cells.size(), 3);
Assert.assertEquals((String)project.rows.get(0).cells.get(0).value, "NDB_No");
@ -99,27 +73,10 @@ public class FixedWidthImporterTests extends RefineTest {
Assert.assertEquals((String)project.rows.get(1).cells.get(1).value, "rt");
Assert.assertNull(project.rows.get(1).cells.get(2));
verify(properties, times(1)).getProperty("fixed-column-widths");
verifyGetOption("ignore",properties);
verifyGetOption("header-lines",properties);
verifyGetOption("limit",properties);
verifyGetOption("skip",properties);
}
//----helpers----
public void whenGetBooleanOption(String name, Properties properties, Boolean def){
when(properties.containsKey(name)).thenReturn(true);
when(properties.getProperty(name)).thenReturn(Boolean.toString(def));
}
public void whenGetIntegerOption(String name, Properties properties, int def){
when(properties.containsKey(name)).thenReturn(true);
when(properties.getProperty(name)).thenReturn(Integer.toString(def));
}
public void verifyGetOption(String name, Properties properties){
verify(properties, times(1)).containsKey(name);
verify(properties, times(1)).getProperty(name);
JSONUtilities.getIntArray(verify(options, times(1)), "columnWidths");
verifyGetOption("ignore", options);
verifyGetOption("header-lines", options);
verifyGetOption("limit", options);
verifyGetOption("skip", options);
}
}

View File

@ -0,0 +1,109 @@
package com.google.refine.tests.importers;
import static org.mockito.Mockito.mock;
import java.io.InputStream;
import java.io.Reader;
import java.util.ArrayList;
import org.json.JSONObject;
import com.google.refine.ProjectMetadata;
import com.google.refine.importers.ImportingParserBase;
import com.google.refine.importers.tree.ImportColumnGroup;
import com.google.refine.importers.tree.TreeImportingParserBase;
import com.google.refine.importers.tree.XmlImportUtilities;
import com.google.refine.importing.ImportingJob;
import com.google.refine.importing.ImportingManager;
import com.google.refine.model.Project;
import com.google.refine.tests.RefineTest;
abstract class ImporterTest extends RefineTest {
//mock dependencies
protected Project project;
protected ProjectMetadata metadata;
protected ImportingJob job;
protected JSONObject options;
public void SetUp(){
//FIXME - should we try and use mock(Project.class); - seems unnecessary complexity
project = new Project();
metadata = new ProjectMetadata();
job = ImportingManager.createJob();
options = mock(JSONObject.class);
}
public void TearDown(){
project = null;
metadata = null;
ImportingManager.disposeJob(job.id);
job = null;
options = null;
}
protected void parseOneFile(ImportingParserBase parser, Reader reader) {
parser.parseOneFile(
project,
metadata,
job,
"file-source",
reader,
-1,
options,
new ArrayList<Exception>()
);
project.update();
}
protected void parseOneFile(ImportingParserBase parser, InputStream inputStream) {
parser.parseOneFile(
project,
metadata,
job,
"file-source",
inputStream,
-1,
options,
new ArrayList<Exception>()
);
project.update();
}
protected void parseOneFile(TreeImportingParserBase parser, Reader reader) {
ImportColumnGroup rootColumnGroup = new ImportColumnGroup();
parser.parseOneFile(
project,
metadata,
job,
"file-source",
reader,
rootColumnGroup,
-1,
options,
new ArrayList<Exception>()
);
XmlImportUtilities.createColumnsFromImport(project, rootColumnGroup);
project.columnModel.update();
}
protected void parseOneFile(TreeImportingParserBase parser, InputStream inputStream) {
ImportColumnGroup rootColumnGroup = new ImportColumnGroup();
parser.parseOneFile(
project,
metadata,
job,
"file-source",
inputStream,
rootColumnGroup,
-1,
options,
new ArrayList<Exception>()
);
XmlImportUtilities.createColumnsFromImport(project, rootColumnGroup);
project.columnModel.update();
}
}

View File

@ -33,12 +33,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package com.google.refine.tests.importers;
import static org.mockito.Mockito.mock;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.util.Properties;
import org.slf4j.LoggerFactory;
import org.testng.Assert;
@ -47,15 +45,12 @@ import org.testng.annotations.BeforeMethod;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test;
import com.google.refine.ProjectMetadata;
import com.google.refine.importers.JsonImporter;
import com.google.refine.importers.parsers.JSONParser;
import com.google.refine.importers.parsers.TreeParserToken;
import com.google.refine.model.Project;
import com.google.refine.importers.JsonImporter.JSONTreeReader;
import com.google.refine.importers.tree.TreeReader.Token;
import com.google.refine.model.Row;
import com.google.refine.tests.RefineTest;
public class JsonImporterTests extends RefineTest {
public class JsonImporterTests extends ImporterTest {
@BeforeTest
public void init() {
logger = LoggerFactory.getLogger(this.getClass());
@ -63,29 +58,30 @@ public class JsonImporterTests extends RefineTest {
//dependencies
Project project = null;
Properties options = null;
ByteArrayInputStream inputStream = null;
//System Under Test
JsonImporter SUT = null;
@BeforeMethod
public void SetUp(){
super.SetUp();
SUT = new JsonImporter();
project = new Project();
options = mock(Properties.class);
}
@AfterMethod
public void TearDown() throws IOException{
public void TearDown() {
SUT = null;
project = null;
options = null;
if (inputStream != null) inputStream.close();
if (inputStream != null) {
try {
inputStream.close();
} catch (IOException e) {
// Ignore
}
inputStream = null;
}
super.TearDown();
}
@Test
public void canParseSample(){
@ -181,8 +177,8 @@ public class JsonImporterTests extends RefineTest {
String sampleJson2 = "{\"field\":{}}";
String sampleJson3 = "{\"field\":[{},{}]}";
JSONParser parser = new JSONParser(new ByteArrayInputStream( sampleJson.getBytes( "UTF-8" ) ));
TreeParserToken token = TreeParserToken.Ignorable;
JSONTreeReader parser = new JSONTreeReader(new StringReader(sampleJson));
Token token = Token.Ignorable;
int i = 0;
try{
while(token != null){
@ -191,8 +187,8 @@ public class JsonImporterTests extends RefineTest {
break;
i++;
if(i == 3){
Assert.assertEquals(TreeParserToken.Value, token);
Assert.assertEquals("field", parser.getLocalName());
Assert.assertEquals(Token.Value, token);
Assert.assertEquals("field", parser.getFieldName());
}
}
}catch(Exception e){
@ -200,8 +196,8 @@ public class JsonImporterTests extends RefineTest {
}
parser = new JSONParser(new ByteArrayInputStream( sampleJson2.getBytes( "UTF-8" ) ) );
token = TreeParserToken.Ignorable;
parser = new JSONTreeReader(new StringReader(sampleJson2));
token = Token.Ignorable;
i = 0;
try{
while(token != null){
@ -210,16 +206,16 @@ public class JsonImporterTests extends RefineTest {
break;
i++;
if(i == 3){
Assert.assertEquals(TreeParserToken.StartEntity, token);
Assert.assertEquals(parser.getLocalName(), "field");
Assert.assertEquals(Token.StartEntity, token);
Assert.assertEquals(parser.getFieldName(), "field");
}
}
}catch(Exception e){
//silent
}
parser = new JSONParser(new ByteArrayInputStream( sampleJson3.getBytes( "UTF-8" ) ) );
token = TreeParserToken.Ignorable;
parser = new JSONTreeReader(new StringReader(sampleJson3));
token = Token.Ignorable;
i = 0;
try{
while(token != null){
@ -228,16 +224,16 @@ public class JsonImporterTests extends RefineTest {
break;
i++;
if(i == 3){
Assert.assertEquals(token, TreeParserToken.StartEntity);
Assert.assertEquals(parser.getLocalName(), "field");
Assert.assertEquals(token, Token.StartEntity);
Assert.assertEquals(parser.getFieldName(), "field");
}
if(i == 4){
Assert.assertEquals(token, TreeParserToken.StartEntity);
Assert.assertEquals(parser.getLocalName(), "__anonymous__");
Assert.assertEquals(token, Token.StartEntity);
Assert.assertEquals(parser.getFieldName(), "__anonymous__");
}
if(i == 6){
Assert.assertEquals(token, TreeParserToken.StartEntity);
Assert.assertEquals(parser.getLocalName(), "__anonymous__");
Assert.assertEquals(token, Token.StartEntity);
Assert.assertEquals(parser.getFieldName(), "__anonymous__");
}
}
}catch(Exception e){
@ -352,7 +348,7 @@ public class JsonImporterTests extends RefineTest {
}
try {
SUT.read(inputStream, project, new ProjectMetadata(), options);
parseOneFile(SUT, inputStream);
} catch (Exception e) {
Assert.fail();
}

View File

@ -34,7 +34,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package com.google.refine.tests.importers;
import java.io.StringReader;
import java.util.Properties;
import org.slf4j.LoggerFactory;
import org.testng.Assert;
@ -42,13 +41,10 @@ import org.testng.annotations.BeforeMethod;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test;
import com.google.refine.ProjectMetadata;
import com.google.refine.importers.RdfTripleImporter;
import com.google.refine.model.Project;
import com.google.refine.tests.RefineTest;
import com.google.refine.util.JSONUtilities;
public class RdfTripleImporterTests extends RefineTest {
public class RdfTripleImporterTests extends ImporterTest {
@BeforeTest
public void init() {
@ -58,15 +54,12 @@ public class RdfTripleImporterTests extends RefineTest {
//System Under Test
RdfTripleImporter SUT = null;
Project project = null;
Properties options = null;
@BeforeMethod
public void SetUp(){
super.SetUp();
SUT = new RdfTripleImporter();
project = new Project();
options = new Properties();
options.put("base-url", "http://rdf.freebase.com");
JSONUtilities.safePut(options, "base-url", "http://rdf.freebase.com");
}
@Test(enabled=false)
@ -75,8 +68,7 @@ public class RdfTripleImporterTests extends RefineTest {
StringReader reader = new StringReader(sampleRdf);
try {
SUT.read(reader, project, new ProjectMetadata(), options);
project.update();
parseOneFile(SUT, reader);
} catch (Exception e) {
Assert.fail();
}
@ -98,8 +90,7 @@ public class RdfTripleImporterTests extends RefineTest {
StringReader reader = new StringReader(sampleRdf);
try {
SUT.read(reader, project, new ProjectMetadata(), options);
project.update();
parseOneFile(SUT, reader);
} catch (Exception e) {
Assert.fail();
}
@ -140,8 +131,7 @@ public class RdfTripleImporterTests extends RefineTest {
StringReader reader = new StringReader(sampleRdf);
try {
SUT.read(reader, project, new ProjectMetadata(), options);
project.update();
parseOneFile(SUT, reader);
} catch (Exception e) {
Assert.fail();
}
@ -175,8 +165,7 @@ public class RdfTripleImporterTests extends RefineTest {
StringReader reader = new StringReader(sampleRdf);
try {
SUT.read(reader, project, new ProjectMetadata(), options);
project.update();
parseOneFile(SUT, reader);
} catch (Exception e) {
Assert.fail();
}

View File

@ -33,15 +33,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package com.google.refine.tests.importers;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
import java.io.IOException;
import java.io.LineNumberReader;
import java.io.StringReader;
import java.util.Properties;
import org.slf4j.LoggerFactory;
import org.testng.Assert;
@ -51,12 +47,10 @@ import org.testng.annotations.BeforeTest;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import com.google.refine.ProjectMetadata;
import com.google.refine.importers.TsvCsvImporter;
import com.google.refine.model.Project;
import com.google.refine.tests.RefineTest;
import com.google.refine.importers.SeparatorBasedImporter;
import com.google.refine.util.JSONUtilities;
public class TsvCsvImporterTests extends RefineTest {
public class TsvCsvImporterTests extends ImporterTest {
@BeforeTest
public void init() {
@ -67,25 +61,18 @@ public class TsvCsvImporterTests extends RefineTest {
String SAMPLE_ROW = "NDB_No,Shrt_Desc,Water";
//System Under Test
TsvCsvImporter SUT = null;
//mock dependencies
Project project = null;
Properties properties = null;
SeparatorBasedImporter SUT = null;
@BeforeMethod
public void SetUp() {
SUT = new TsvCsvImporter();
project = new Project(); //FIXME - should we try and use mock(Project.class); - seems unnecessary complexity
properties = mock(Properties.class);
super.SetUp();
SUT = new SeparatorBasedImporter();
}
@AfterMethod
public void TearDown(){
SUT = null;
project = null;
properties = null;
super.TearDown();
}
@Test(dataProvider = "CSV-TSV-AutoDetermine")
@ -94,11 +81,10 @@ public class TsvCsvImporterTests extends RefineTest {
String inputSeparator = sep == "\t" ? "\t" : ",";
String input = "col1" + inputSeparator + "col2" + inputSeparator + "col3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(lnReader, project, sep, -1, 0, 0, 1, false, true, false);
} catch (IOException e) {
prepareOptions(sep, -1, 0, 0, 1, false, true, false);
parseOneFile(SUT, new StringReader(input));
} catch (Exception e) {
Assert.fail();
}
Assert.assertEquals(project.columnModel.columns.size(), 3);
@ -113,11 +99,10 @@ public class TsvCsvImporterTests extends RefineTest {
String inputSeparator = sep == "\t" ? "\t" : ",";
String input = "value1" + inputSeparator + "value2" + inputSeparator + "value3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(lnReader, project, sep, -1, 0, 0, 0, false, false, false);
} catch (IOException e) {
prepareOptions(sep, -1, 0, 0, 0, false, false, false);
parseOneFile(SUT, new StringReader(input));
} catch (Exception e) {
Assert.fail();
}
Assert.assertEquals(project.columnModel.columns.size(), 1);
@ -135,10 +120,10 @@ public class TsvCsvImporterTests extends RefineTest {
"data1" + inputSeparator + "data2" + inputSeparator + "data3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(lnReader, project, sep, -1, 0, 0, 1, false, true, false);
} catch (IOException e) {
prepareOptions(sep, -1, 0, 0, 1, false, true, false);
parseOneFile(SUT, new StringReader(input));
} catch (Exception e) {
Assert.fail();
}
@ -160,13 +145,12 @@ public class TsvCsvImporterTests extends RefineTest {
String input = "col1" + inputSeparator + "col2" + inputSeparator + "col3\n" +
"data1" + inputSeparator + "234" + inputSeparator + "data3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(lnReader, project, sep, -1, 0, 0, 1, true, true, false);
} catch (IOException e) {
prepareOptions(sep, -1, 0, 0, 1, true, true, false);
parseOneFile(SUT, new StringReader(input));
} catch (Exception e) {
Assert.fail();
}
Assert.assertEquals(project.columnModel.columns.size(), 3);
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1");
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2");
@ -185,13 +169,12 @@ public class TsvCsvImporterTests extends RefineTest {
String inputSeparator = sep == "\t" ? "\t" : ",";
String input = "data1" + inputSeparator + "data2" + inputSeparator + "data3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(lnReader, project, sep, -1, 0, 0, 0, false, true, false);
} catch (IOException e) {
prepareOptions(sep, -1, 0, 0, 0, false, true, false);
parseOneFile(SUT, new StringReader(input));
} catch (Exception e) {
Assert.fail();
}
Assert.assertEquals(project.columnModel.columns.size(), 3);
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "Column");
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "Column2");
@ -209,13 +192,12 @@ public class TsvCsvImporterTests extends RefineTest {
String inputSeparator = sep == "\t" ? "\t" : ",";
String input = " data1 " + inputSeparator + " 3.4 " + inputSeparator + " data3 ";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(lnReader, project, sep, -1, 0, 0, 0, false, true, false);
} catch (IOException e) {
prepareOptions(sep, -1, 0, 0, 0, false, true, false);
parseOneFile(SUT, new StringReader(input));
} catch (Exception e) {
Assert.fail();
}
Assert.assertEquals(project.columnModel.columns.size(), 3);
Assert.assertEquals(project.rows.size(), 1);
Assert.assertEquals(project.rows.get(0).cells.size(), 3);
@ -230,13 +212,12 @@ public class TsvCsvImporterTests extends RefineTest {
String inputSeparator = sep == "\t" ? "\t" : ",";
String input = " data1" + inputSeparator + " 12" + inputSeparator + " data3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(lnReader, project, sep, -1, 0, 0, 0, true, true, false);
} catch (IOException e) {
prepareOptions(sep, -1, 0, 0, 0, true, true, false);
parseOneFile(SUT, new StringReader(input));
} catch (Exception e) {
Assert.fail();
}
Assert.assertEquals(project.columnModel.columns.size(), 3);
Assert.assertEquals(project.rows.size(), 1);
Assert.assertEquals(project.rows.get(0).cells.size(), 3);
@ -251,13 +232,12 @@ public class TsvCsvImporterTests extends RefineTest {
String inputSeparator = sep == "\t" ? "\t" : ",";
String input = " data1" + inputSeparator + inputSeparator + " data3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(lnReader, project, sep, -1, 0, 0, 0, true, true, false);
} catch (IOException e) {
prepareOptions(sep, -1, 0, 0, 0, true, true, false);
parseOneFile(SUT, new StringReader(input));
} catch (Exception e) {
Assert.fail();
}
Assert.assertEquals(project.columnModel.columns.size(), 3);
Assert.assertEquals(project.rows.size(), 1);
Assert.assertEquals(project.rows.get(0).cells.size(), 3);
@ -274,13 +254,12 @@ public class TsvCsvImporterTests extends RefineTest {
"sub1" + inputSeparator + "sub2" + inputSeparator + "sub3\n" +
"data1" + inputSeparator + "data2" + inputSeparator + "data3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(lnReader, project, sep, -1, 0, 0, 2, false, true, false);
} catch (IOException e) {
prepareOptions(sep, -1, 0, 0, 2, false, true, false);
parseOneFile(SUT, new StringReader(input));
} catch (Exception e) {
Assert.fail();
}
Assert.assertEquals(project.columnModel.columns.size(), 3);
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1 sub1");
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2 sub2");
@ -299,13 +278,12 @@ public class TsvCsvImporterTests extends RefineTest {
String input = "col1" + inputSeparator + "col2" + inputSeparator + "col3\n" +
"data1" + inputSeparator + "data2" + inputSeparator + "data3" + inputSeparator + "data4" + inputSeparator + "data5" + inputSeparator + "data6";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(lnReader, project, sep, -1, 0, 0, 1, false, true, false);
} catch (IOException e) {
prepareOptions(sep, -1, 0, 0, 1, false, true, false);
parseOneFile(SUT, new StringReader(input));
} catch (Exception e) {
Assert.fail();
}
Assert.assertEquals(project.columnModel.columns.size(), 6);
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1");
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2");
@ -330,13 +308,12 @@ public class TsvCsvImporterTests extends RefineTest {
String input = "col1" + inputSeparator + "col2" + inputSeparator + "col3\n" +
"\"\"\"To Be\"\" is often followed by \"\"or not To Be\"\"\"" + inputSeparator + "data2";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(lnReader, project, sep, -1, 0, 0, 1, false, true, false);
} catch (IOException e) {
prepareOptions(sep, -1, 0, 0, 1, false, true, false);
parseOneFile(SUT, new StringReader(input));
} catch (Exception e) {
Assert.fail();
}
Assert.assertEquals(project.columnModel.columns.size(), 3);
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1");
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2");
@ -355,13 +332,12 @@ public class TsvCsvImporterTests extends RefineTest {
"col1" + inputSeparator + "col2" + inputSeparator + "col3\n" +
"data1" + inputSeparator + "data2" + inputSeparator + "data3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(lnReader, project, sep, -1, 0, 1, 1, false, true, false);
} catch (IOException e) {
prepareOptions(sep, -1, 0, 1, 1, false, true, false);
parseOneFile(SUT, new StringReader(input));
} catch (Exception e) {
Assert.fail();
}
Assert.assertEquals(project.columnModel.columns.size(), 3);
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1");
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2");
@ -381,13 +357,12 @@ public class TsvCsvImporterTests extends RefineTest {
"skip1\n" +
"data1" + inputSeparator + "data2" + inputSeparator + "data3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(lnReader, project, sep, -1, 1, 0, 1, false, true, false);
} catch (IOException e) {
prepareOptions(sep, -1, 1, 0, 1, false, true, false);
parseOneFile(SUT, new StringReader(input));
} catch (Exception e) {
Assert.fail();
}
Assert.assertEquals(project.columnModel.columns.size(), 3);
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1");
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2");
@ -411,13 +386,12 @@ public class TsvCsvImporterTests extends RefineTest {
"skip1\n" +
"data1" + inputSeparator + "data2" + inputSeparator + "data3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(lnReader, project, sep, -1, 1, 3, 2, false, true, false);
} catch (IOException e) {
prepareOptions(sep, -1, 1, 3, 2, false, true, false);
parseOneFile(SUT, new StringReader(input));
} catch (Exception e) {
Assert.fail();
}
Assert.assertEquals(project.columnModel.columns.size(), 3);
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1 sub1");
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2 sub2");
@ -444,10 +418,10 @@ public class TsvCsvImporterTests extends RefineTest {
"data-row2-cell1" + inputSeparator + "data-row2-cell2" + inputSeparator + "\n" + //missing last data point of this row on purpose
"data-row3-cell1" + inputSeparator + "data-row3-cell2" + inputSeparator + "data-row1-cell3";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(lnReader, project, sep, 2, 2, 3, 2, false, true, false);
} catch (IOException e) {
prepareOptions(sep, 2, 2, 3, 2, false, true, false);
parseOneFile(SUT, new StringReader(input));
} catch (Exception e) {
Assert.fail();
}
Assert.assertEquals(project.columnModel.columns.size(), 3);
@ -471,13 +445,12 @@ public class TsvCsvImporterTests extends RefineTest {
String inputSeparator = sep == "\t" ? "\t" : ",";
String input = "data1" + inputSeparator + "data2\"" + inputSeparator + "data3" + inputSeparator + "data4";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(lnReader, project, sep, -1, 0, 0, 0, false, true, true);
} catch (IOException e) {
prepareOptions(sep, -1, 0, 0, 0, false, true, true);
parseOneFile(SUT, new StringReader(input));
} catch (Exception e) {
Assert.fail();
}
Assert.assertEquals(project.columnModel.columns.size(), 4);
Assert.assertEquals(project.rows.size(), 1);
Assert.assertEquals(project.rows.get(0).cells.size(), 4);
@ -493,13 +466,12 @@ public class TsvCsvImporterTests extends RefineTest {
String input = "col1" + inputSeparator + "col2" + inputSeparator + "col3\n" +
"\"\"\"To\n Be\"\" is often followed by \"\"or not To\n Be\"\"\"" + inputSeparator + "data2";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(lnReader, project, sep, -1, 0, 0, 1, false, true, false);
} catch (IOException e) {
prepareOptions(sep, -1, 0, 0, 1, false, true, false);
parseOneFile(SUT, new StringReader(input));
} catch (Exception e) {
Assert.fail();
}
Assert.assertEquals(project.columnModel.columns.size(), 3);
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1");
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2");
@ -517,13 +489,12 @@ public class TsvCsvImporterTests extends RefineTest {
String input = "col1" + inputSeparator + "col2" + inputSeparator + "col3\n" +
"\"A line with many \n\n\n\n\n empty lines\"" + inputSeparator + "data2";
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
try {
SUT.read(lnReader, project, sep, -1, 0, 0, 1, false, true, false);
} catch (IOException e) {
prepareOptions(sep, -1, 0, 0, 1, false, true, false);
parseOneFile(SUT, new StringReader(input));
} catch (Exception e) {
Assert.fail();
}
Assert.assertEquals(project.columnModel.columns.size(), 3);
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1");
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2");
@ -539,32 +510,31 @@ public class TsvCsvImporterTests extends RefineTest {
public void readCsvWithProperties() {
StringReader reader = new StringReader(SAMPLE_ROW);
when(properties.getProperty("separator")).thenReturn(",");
whenGetIntegerOption("ignore",properties,0);
whenGetIntegerOption("header-lines",properties,0);
whenGetIntegerOption("limit",properties,-1);
whenGetIntegerOption("skip",properties,0);
whenGetIntegerOption("ignore-quotes",properties,0);
when(JSONUtilities.getString(options, "separator", null)).thenReturn(",");
whenGetIntegerOption("ignore", options, 0);
whenGetIntegerOption("header-lines", options, 0);
whenGetIntegerOption("limit", options, -1);
whenGetIntegerOption("skip", options, 0);
whenGetIntegerOption("ignore-quotes", options, 0);
try {
SUT.read(reader, project, new ProjectMetadata(), properties);
parseOneFile(SUT, reader);
} catch (Exception e) {
Assert.fail();
}
Assert.assertEquals(project.rows.size(), 1);
Assert.assertEquals(project.rows.get(0).cells.size(), 3);
Assert.assertEquals((String)project.rows.get(0).cells.get(0).value, "NDB_No");
Assert.assertEquals((String)project.rows.get(0).cells.get(1).value, "Shrt_Desc");
Assert.assertEquals((String)project.rows.get(0).cells.get(2).value, "Water");
verify(properties, times(1)).getProperty("separator");
verifyGetOption("ignore",properties);
verifyGetOption("header-lines",properties);
verifyGetOption("limit",properties);
verifyGetOption("skip",properties);
verifyGetOption("ignore-quotes",properties);
JSONUtilities.getString(verify(options, times(1)), "separator", null);
verifyGetOption("ignore", options);
verifyGetOption("header-lines", options);
verifyGetOption("limit", options);
verifyGetOption("skip", options);
verifyGetOption("ignore-quotes", options);
}
@Test
@ -572,20 +542,19 @@ public class TsvCsvImporterTests extends RefineTest {
String input = "data1,data2\",data3,data4";
StringReader reader = new StringReader(input);
when(properties.getProperty("separator")).thenReturn(",");
whenGetIntegerOption("ignore",properties,0);
whenGetIntegerOption("header-lines",properties,0);
whenGetIntegerOption("limit",properties,-1);
whenGetIntegerOption("skip",properties,0);
whenGetBooleanOption("ignore-quotes",properties,true);
when(JSONUtilities.getString(options, "separator", null)).thenReturn(",");
whenGetIntegerOption("ignore", options, 0);
whenGetIntegerOption("header-lines", options, 0);
whenGetIntegerOption("limit", options, -1);
whenGetIntegerOption("skip", options, 0);
whenGetBooleanOption("ignore-quotes", options, true);
try {
SUT.read(reader, project, new ProjectMetadata(), properties);
parseOneFile(SUT, reader);
} catch (Exception e) {
Assert.fail();
}
Assert.assertEquals(project.rows.size(), 1);
Assert.assertEquals(project.rows.get(0).cells.size(), 4);
Assert.assertEquals((String)project.rows.get(0).cells.get(0).value, "data1");
@ -593,12 +562,12 @@ public class TsvCsvImporterTests extends RefineTest {
Assert.assertEquals((String)project.rows.get(0).cells.get(2).value, "data3");
Assert.assertEquals((String)project.rows.get(0).cells.get(3).value, "data4");
verify(properties, times(1)).getProperty("separator");
verifyGetOption("ignore",properties);
verifyGetOption("header-lines",properties);
verifyGetOption("limit",properties);
verifyGetOption("skip",properties);
verifyGetOption("ignore-quotes",properties);
JSONUtilities.getString(verify(options, times(1)), "separator", null);
verifyGetOption("ignore", options);
verifyGetOption("header-lines", options);
verifyGetOption("limit", options);
verifyGetOption("skip", options);
verifyGetOption("ignore-quotes", options);
}
//--helpers--
@ -612,19 +581,16 @@ public class TsvCsvImporterTests extends RefineTest {
};
}
public void whenGetBooleanOption(String name, Properties properties, Boolean def){
when(properties.containsKey(name)).thenReturn(true);
when(properties.getProperty(name)).thenReturn(Boolean.toString(def));
private void prepareOptions(
String sep, int limit, int skip, int ignoreLines,
int headerLines, boolean guessValueType, boolean splitIntoColumns, boolean ignoreQuotes) {
JSONUtilities.safePut(options, "separator", sep);
JSONUtilities.safePut(options, "limit", limit);
JSONUtilities.safePut(options, "skipDataLines", skip);
JSONUtilities.safePut(options, "ignoreLines", ignoreLines);
JSONUtilities.safePut(options, "headerLines", headerLines);
JSONUtilities.safePut(options, "guessCellValueTypes", guessValueType);
JSONUtilities.safePut(options, "splitIntoColumns", splitIntoColumns);
JSONUtilities.safePut(options, "processQuotes", !ignoreQuotes);
}
public void whenGetIntegerOption(String name, Properties properties, int def){
when(properties.containsKey(name)).thenReturn(true);
when(properties.getProperty(name)).thenReturn(Integer.toString(def));
}
public void verifyGetOption(String name, Properties properties){
verify(properties, times(1)).containsKey(name);
verify(properties, times(1)).getProperty(name);
}
}

View File

@ -35,27 +35,27 @@ package com.google.refine.tests.importers;
import java.util.List;
import javax.servlet.ServletException;
import com.google.refine.importers.XmlImportUtilities;
import com.google.refine.importers.parsers.TreeParser;
import com.google.refine.importers.tree.ImportColumnGroup;
import com.google.refine.importers.tree.ImportRecord;
import com.google.refine.importers.tree.TreeReader;
import com.google.refine.importers.tree.XmlImportUtilities;
import com.google.refine.model.Project;
public class XmlImportUtilitiesStub extends XmlImportUtilities {
public List<String> detectRecordElementWrapper(TreeParser parser, String tag) throws ServletException{
public List<String> detectRecordElementWrapper(TreeReader parser, String tag) throws Exception{
return super.detectRecordElement(parser, tag);
}
public void ProcessSubRecordWrapper(Project project, TreeParser parser, ImportColumnGroup columnGroup, ImportRecord record) throws ServletException{
public void ProcessSubRecordWrapper(Project project, TreeReader parser, ImportColumnGroup columnGroup, ImportRecord record) throws Exception{
super.processSubRecord(project, parser, columnGroup, record);
}
public void findRecordWrapper(Project project, TreeParser parser, String[] recordPath, int pathIndex, ImportColumnGroup rootColumnGroup) throws ServletException{
super.findRecord(project, parser, recordPath, pathIndex, rootColumnGroup);
public void findRecordWrapper(Project project, TreeReader parser, String[] recordPath, int pathIndex, ImportColumnGroup rootColumnGroup) throws Exception{
super.findRecord(project, parser, recordPath, pathIndex, rootColumnGroup, -1);
}
public void processRecordWrapper(Project project, TreeParser parser, ImportColumnGroup rootColumnGroup) throws ServletException{
public void processRecordWrapper(Project project, TreeReader parser, ImportColumnGroup rootColumnGroup) throws Exception{
super.processRecord(project, parser, rootColumnGroup);
}

View File

@ -35,11 +35,12 @@ package com.google.refine.tests.importers;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
import javax.servlet.ServletException;
import javax.xml.stream.XMLStreamException;
import org.slf4j.LoggerFactory;
import org.testng.Assert;
@ -48,13 +49,12 @@ import org.testng.annotations.BeforeMethod;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test;
import com.google.refine.importers.TreeImportUtilities.ImportColumn;
import com.google.refine.importers.TreeImportUtilities.ImportColumnGroup;
import com.google.refine.importers.TreeImportUtilities.ImportRecord;
import com.google.refine.importers.parsers.JSONParser;
import com.google.refine.importers.parsers.TreeParser;
import com.google.refine.importers.parsers.TreeParserToken;
import com.google.refine.importers.parsers.XmlParser;
import com.google.refine.importers.JsonImporter.JSONTreeReader;
import com.google.refine.importers.XmlImporter.XmlParser;
import com.google.refine.importers.tree.ImportColumn;
import com.google.refine.importers.tree.ImportColumnGroup;
import com.google.refine.importers.tree.ImportRecord;
import com.google.refine.importers.tree.TreeReader;
import com.google.refine.model.Project;
import com.google.refine.model.Row;
import com.google.refine.tests.RefineTest;
@ -69,7 +69,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
//dependencies
Project project;
TreeParser parser;
TreeReader parser;
ImportColumnGroup columnGroup;
ImportRecord record;
ByteArrayInputStream inputStream;
@ -134,7 +134,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
List<String> response = new ArrayList<String>();
try {
response = SUT.detectRecordElementWrapper(parser, tag);
} catch (ServletException e) {
} catch (Exception e) {
Assert.fail(e.getMessage());
}
Assert.assertNotNull(response);
@ -152,7 +152,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
List<String> response = new ArrayList<String>();
try {
response = SUT.detectRecordElementWrapper(parser, tag);
} catch (ServletException e) {
} catch (Exception e) {
Assert.fail(e.getMessage());
}
Assert.assertNotNull(response);
@ -171,7 +171,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
List<String> response = new ArrayList<String>();
try {
response = SUT.detectRecordElementWrapper(parser, tag);
} catch (ServletException e) {
} catch (Exception e) {
Assert.fail(e.getMessage());
}
Assert.assertNull(response);
@ -181,7 +181,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
public void detectRecordElementRegressionXmlTest(){
loadSampleXml();
String[] path = XmlImportUtilitiesStub.detectRecordElement(new XmlParser(inputStream));
String[] path = XmlImportUtilitiesStub.detectRecordElement(createXmlParser());
Assert.assertNotNull(path);
Assert.assertEquals(path.length, 2);
Assert.assertEquals(path[0], "library");
@ -192,7 +192,8 @@ public class XmlImportUtilitiesTests extends RefineTest {
public void detectRecordElementRegressionJsonTest(){
loadSampleJson();
String[] path = XmlImportUtilitiesStub.detectRecordElement(new JSONParser(inputStream));
String[] path = XmlImportUtilitiesStub.detectRecordElement(
new JSONTreeReader(new InputStreamReader(inputStream)));
Assert.assertNotNull(path);
Assert.assertEquals(path.length, 2);
Assert.assertEquals(path[0], "__anonymous__");
@ -204,7 +205,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
loadSampleXml();
String[] recordPath = new String[]{"library","book"};
XmlImportUtilitiesStub.importTreeData(new XmlParser(inputStream), project, recordPath, columnGroup );
XmlImportUtilitiesStub.importTreeData(createXmlParser(), project, recordPath, columnGroup, -1);
log(project);
assertProjectCreated(project, 0, 6);
@ -224,7 +225,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
loadData(XmlImporterTests.getSampleWithVaryingStructure());
String[] recordPath = new String[]{"library", "book"};
XmlImportUtilitiesStub.importTreeData(new XmlParser(inputStream), project, recordPath, columnGroup);
XmlImportUtilitiesStub.importTreeData(createXmlParser(), project, recordPath, columnGroup, -1);
log(project);
assertProjectCreated(project, 0, 6);
@ -278,7 +279,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
try {
SUT.findRecordWrapper(project, parser, recordPath, pathIndex, columnGroup);
} catch (ServletException e) {
} catch (Exception e) {
Assert.fail();
}
@ -297,7 +298,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
try {
SUT.processRecordWrapper(project, parser, columnGroup);
} catch (ServletException e) {
} catch (Exception e) {
Assert.fail();
}
log(project);
@ -318,7 +319,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
try {
SUT.processRecordWrapper(project, parser, columnGroup);
} catch (ServletException e) {
} catch (Exception e) {
Assert.fail();
}
log(project);
@ -343,7 +344,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
try {
SUT.processRecordWrapper(project, parser, columnGroup);
} catch (ServletException e) {
} catch (Exception e) {
Assert.fail();
}
log(project);
@ -367,7 +368,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
try {
SUT.ProcessSubRecordWrapper(project, parser, columnGroup, record);
} catch (ServletException e) {
} catch (Exception e) {
Assert.fail();
}
log(project);
@ -429,18 +430,24 @@ public class XmlImportUtilitiesTests extends RefineTest {
public void ParserSkip(){
try {
if(parser.getEventType() == TreeParserToken.Ignorable){
if (parser.current() == TreeReader.Token.Ignorable){
parser.next(); //move parser forward once e.g. skip the START_DOCUMENT parser event
}
} catch (ServletException e1) {
} catch (Exception e1) {
Assert.fail();
}
}
public void createXmlParser(){
public TreeReader createXmlParser(){
try {
parser = new XmlParser(inputStream);
}
public void createJsonParser(){
parser = new JSONParser(inputStream);
return parser;
} catch (XMLStreamException e) {
return null;
}
}
public TreeReader createJsonParser(){
parser = new JSONTreeReader(new InputStreamReader(inputStream));
return parser;
}
}

View File

@ -33,12 +33,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package com.google.refine.tests.importers;
import static org.mockito.Mockito.mock;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.Properties;
import org.slf4j.LoggerFactory;
import org.testng.Assert;
@ -47,14 +44,11 @@ import org.testng.annotations.BeforeMethod;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test;
import com.google.refine.ProjectMetadata;
import com.google.refine.importers.XmlImporter;
import com.google.refine.model.Project;
import com.google.refine.model.Row;
import com.google.refine.tests.RefineTest;
public class XmlImporterTests extends RefineTest {
public class XmlImporterTests extends ImporterTest {
@BeforeTest
public void init() {
@ -62,29 +56,30 @@ public class XmlImporterTests extends RefineTest {
}
//dependencies
Project project = null;
Properties options = null;
ByteArrayInputStream inputStream = null;
//System Under Test
XmlImporter SUT = null;
@BeforeMethod
public void SetUp(){
super.SetUp();
SUT = new XmlImporter();
project = new Project();
options = mock(Properties.class);
}
@AfterMethod
public void TearDown() throws IOException{
public void TearDown() {
SUT = null;
project = null;
options = null;
if (inputStream != null) inputStream.close();
if (inputStream != null) {
try {
inputStream.close();
} catch (IOException e) {
// Ignore
}
inputStream = null;
}
super.TearDown();
}
@Test
public void canParseSample(){
@ -309,11 +304,9 @@ public class XmlImporterTests extends RefineTest {
}
try {
SUT.read(inputStream, project, new ProjectMetadata(), options);
parseOneFile(SUT, inputStream);
} catch (Exception e) {
Assert.fail();
}
}
}

View File

@ -50,9 +50,10 @@ function registerCommands() {
RS.registerCommand(module, "get-version", new Packages.com.google.refine.commands.GetVersionCommand());
RS.registerCommand(module, "create-import-job", new Packages.com.google.refine.commands.importing.CreateImportJobCommand());
RS.registerCommand(module, "retrieve-import-content", new Packages.com.google.refine.commands.importing.RetrieveImportContentCommand());
RS.registerCommand(module, "get-import-job-status", new Packages.com.google.refine.commands.importing.GetImportJobStatusCommand());
RS.registerCommand(module, "get-importing-configuration", new Packages.com.google.refine.commands.importing.GetImportingConfigurationCommand());
RS.registerCommand(module, "create-importing-job", new Packages.com.google.refine.commands.importing.CreateImportingJobCommand());
RS.registerCommand(module, "get-importing-job-status", new Packages.com.google.refine.commands.importing.GetImportingJobStatusCommand());
RS.registerCommand(module, "importing-controller", new Packages.com.google.refine.commands.importing.ImportingControllerCommand());
RS.registerCommand(module, "create-project-from-upload", new Packages.com.google.refine.commands.project.CreateProjectCommand());
RS.registerCommand(module, "import-project", new Packages.com.google.refine.commands.project.ImportProjectCommand());
@ -120,12 +121,9 @@ function registerCommands() {
RS.registerCommand(module, "get-expression-language-info", new Packages.com.google.refine.commands.expr.GetExpressionLanguageInfoCommand());
RS.registerCommand(module, "get-expression-history", new Packages.com.google.refine.commands.expr.GetExpressionHistoryCommand());
RS.registerCommand(module, "get-starred-expressions", new Packages.com.google.refine.commands.expr.GetStarredExpressionsCommand());
RS.registerCommand(module, "toggle-starred-expression", new Packages.com.google.refine.commands.expr.ToggleStarredExpressionCommand());
RS.registerCommand(module, "log-expression", new Packages.com.google.refine.commands.expr.LogExpressionCommand());
RS.registerCommand(module, "preview-expression", new Packages.com.google.refine.commands.expr.PreviewExpressionCommand());
RS.registerCommand(module, "get-preference", new Packages.com.google.refine.commands.GetPreferenceCommand());
RS.registerCommand(module, "get-all-preferences", new Packages.com.google.refine.commands.GetAllPreferencesCommand());
RS.registerCommand(module, "set-preference", new Packages.com.google.refine.commands.SetPreferenceCommand());
@ -168,11 +166,98 @@ function registerOperations() {
OR.registerOperation(module, "recon-copy-across-columns", Packages.com.google.refine.operations.recon.ReconCopyAcrossColumnsOperation);
}
function registerImportSourceClasses() {
var RM = Packages.com.google.refine.commands.importing.ImportManager;
RM.registerImportSourceClass("file-upload", Packages.com.google.refine.model.meta.FileUploadImportSource);
RM.registerImportSourceClass("text", Packages.com.google.refine.model.meta.TextImportSource);
RM.registerImportSourceClass("web", Packages.com.google.refine.model.meta.WebImportSource);
function registerImporting() {
var IM = Packages.com.google.refine.importing.ImportingManager;
/*
* Formats and their UI class names and parsers:
* - UI class names are used on the client-side in Javascript to instantiate code that lets the user
* configure the parser's options.
* - Parsers are server-side code that do the actual parsing. Because they have access to the raw files,
* they also generate defaults for the client-side UIs to initialize.
*/
IM.registerFormat("text", "Text files"); // generic format, no parser to handle it
IM.registerFormat("text/line-based", "Line-based text files", "LineBasedParserUI",
new Packages.com.google.refine.importers.LineBasedImporter());
IM.registerFormat("text/line-based/*sv", "CSV / TSV / separator-based files", "SeparatorBasedParserUI",
new Packages.com.google.refine.importers.SeparatorBasedImporter());
IM.registerFormat("text/line-based/fixed-width", "Fixed-width field text files", "FixedWidthParserUI",
new Packages.com.google.refine.importers.FixedWidthImporter());
IM.registerFormat("text/xml", "XML files", "XmlParserUI", new Packages.com.google.refine.importers.XmlImporter());
IM.registerFormat("text/xml/xlsx", "Excel (.xlsx) files", "ExcelParserUI", new Packages.com.google.refine.importers.ExcelImporter());
IM.registerFormat("text/xml/rdf", "RDF/XML files", "RdfParserUI", new Packages.com.google.refine.importers.RdfTripleImporter());
IM.registerFormat("text/json", "JSON files", "JsonParserUI", new Packages.com.google.refine.importers.JsonImporter());
IM.registerFormat("text/marc", "MARC files");
IM.registerFormat("binary", "Binary files"); // generic format, no parser to handle it
IM.registerFormat("binary/xls", "Excel files", "ExcelParserUI", new Packages.com.google.refine.importers.ExcelImporter());
IM.registerFormat("service", "Services"); // generic format, no parser to handle it
/*
* Extension to format mappings
*/
IM.registerExtension(".txt", "text/line-based");
IM.registerExtension(".csv", "text/line-based/*sv");
IM.registerExtension(".tsv", "text/line-based/*sv");
IM.registerExtension(".xml", "text/xml");
IM.registerExtension(".rdf", "text/xml/rdf");
IM.registerExtension(".json", "text/json");
IM.registerExtension(".js", "text/json");
IM.registerExtension(".xls", "binary/xls");
IM.registerExtension(".xlsx", "text/xml/xlsx");
IM.registerExtension(".marc", "text/marc");
IM.registerExtension(".mrc", "text/marc");
/*
* Mime type to format mappings
*/
IM.registerMimeType("text/plain", "text/line-based");
IM.registerMimeType("text/csv", "text/line-based/*sv");
IM.registerMimeType("text/x-csv", "text/line-based/*sv");
IM.registerMimeType("text/tab-separated-value", "text/line-based/*sv");
IM.registerMimeType("text/fixed-width", "text/line-based/fixed-width");
IM.registerMimeType("application/msexcel", "binary/xls");
IM.registerMimeType("application/x-msexcel", "binary/xls");
IM.registerMimeType("application/x-ms-excel", "binary/xls");
IM.registerMimeType("application/vnd.ms-excel", "binary/xls");
IM.registerMimeType("application/x-excel", "binary/xls");
IM.registerMimeType("application/xls", "binary/xls");
IM.registerMimeType("application/x-xls", "text/xml/xlsx");
IM.registerMimeType("application/json", "text/json");
IM.registerMimeType("text/json", "text/json");
IM.registerMimeType("application/rdf+xml", "text/xml/rdf");
IM.registerMimeType("application/marc", "text/marc");
/*
* Format guessers: these take a format derived from extensions or mime-types,
* look at the actual files' content, and try to guess a better format.
*/
IM.registerFormatGuesser("text", new Packages.com.google.refine.importers.TextFormatGuesser());
IM.registerFormatGuesser("text/line-based", new Packages.com.google.refine.importers.LineBasedFormatGuesser());
/*
* Controllers: these implement high-level UI flows for importing data. For example, the default
* controller lets the user specify one or more source files, either local or remote or on the clipboard,
* lets the user select which files to actually import in case any of the original file is an archive
* containing several files, and then lets the user configure parsing options.
*/
IM.registerController(
module,
"default-importing-controller",
new Packages.com.google.refine.importing.DefaultImportingController()
);
}
/*
@ -183,7 +268,7 @@ function init() {
registerCommands();
registerOperations();
registerImportSourceClasses();
registerImporting();
var RC = Packages.com.google.refine.model.recon.ReconConfig;
RC.registerReconConfig(module, "standard-service", Packages.com.google.refine.model.recon.StandardReconConfig);
@ -193,12 +278,36 @@ function init() {
module,
[
"externals/jquery-1.4.2.min.js",
"externals/jquery.cookie.js",
"externals/jquery.eventstack-0.3.js",
"externals/jquery-ui/jquery-ui-1.8.custom.min.js",
"externals/date.js",
"scripts/util/misc.js",
"scripts/util/url.js",
"scripts/util/string.js",
"scripts/util/ajax.js",
"scripts/util/menu.js",
"scripts/util/dialog.js",
"scripts/util/dom.js",
"scripts/index.js",
"scripts/index/import-sources.js"
"scripts/index/create-project-ui.js",
"scripts/index/open-project-ui.js",
"scripts/index/import-project-ui.js",
"scripts/index/default-importing-controller/controller.js",
"scripts/index/default-importing-controller/file-selection-panel.js",
"scripts/index/default-importing-controller/parsing-panel.js",
"scripts/index/default-importing-sources/sources.js",
"scripts/index/parser-interfaces/preview-table.js",
"scripts/index/parser-interfaces/separator-based-parser-ui.js",
"scripts/index/parser-interfaces/line-based-parser-ui.js",
"scripts/index/parser-interfaces/fixed-width-parser-ui.js",
"scripts/index/parser-interfaces/excel-parser-ui.js",
"scripts/index/parser-interfaces/xml-parser-ui.js",
"scripts/index/parser-interfaces/json-parser-ui.js"
]
);
@ -210,32 +319,20 @@ function init() {
"styles/jquery-ui-overrides.less",
"styles/common.less",
"styles/pure.css",
"styles/index.less"
]
);
"styles/index.less",
"styles/index/create-project-ui.less",
"styles/index/open-project-ui.less",
"styles/index/import-project-ui.less",
ClientSideResourceManager.addPaths(
"import/scripts",
module,
[
"externals/jquery-1.4.2.min.js",
"externals/jquery-ui/jquery-ui-1.8.custom.min.js",
"externals/date.js",
"scripts/util/string.js",
"scripts/util/dom.js",
"scripts/import.js"
]
);
"styles/index/default-importing-controller.less",
"styles/index/default-importing-file-selection-panel.less",
"styles/index/default-importing-parsing-panel.less",
ClientSideResourceManager.addPaths(
"import/styles",
module,
[
"externals/jquery-ui/css/ui-lightness/jquery-ui-1.8.custom.css",
"styles/jquery-ui-overrides.less",
"styles/common.less",
"styles/pure.css",
"styles/import.less"
"styles/index/default-importing-sources.less",
"styles/views/data-table-view.less", // for the preview table's styles
"styles/index/fixed-width-parser-ui.less",
"styles/index/xml-parser-ui.less",
"styles/index/json-parser-ui.less"
]
);

View File

@ -41,182 +41,33 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
$styleInjection
</head>
<body>
#if($params.new && $params.new == "1")
#set($newStyle = "")
#set($oldStyle = "display: none; ")
#else
#set($oldStyle = "")
#set($newStyle = "display: none; ")
#end
<div id="container">
<div id="logo"> </div>
<div id="header-home">
<img alt="Google Refine" src="images/logo-googlerefine-40.png" />
<h1>A power tool for working with messy data.</h1>
<div id="header">
<img alt="Google Refine" src="images/logo-googlerefine-30.png" width="129" height="29" />
A power tool for working with messy data.
</div>
<div id="content-home">
<div id="left-panel" class="main-layout-panel"><div id="left-panel-body">
<ul id="action-area-tabs">
</ul>
<div id="project-links">
<div id="logo-container">
<img alt="Google Refine" src="images/logo-gem-40.png" />
<div id="google-refine-version"></div>
</div>
<ul>
<li><a href="http://code.google.com/p/google-refine/wiki/DocumentationForUsers">Help</a></li>
<li><a href="/about.html">About</a></li>
</ul>
</div>
</div></div> <!-- left-panel -->
<div id="right-panel" class="main-layout-panel"><div id="right-panel-body">
</div></div> <!-- right-panel -->
<div id="no-project-message" class="message" style="display: none;">
No existing project. Create one now!<br/>
Try these <a href="http://code.google.com/p/google-refine/wiki/SampleDatasets" target="_blank">sample data sets &raquo;</a>
</div>
<div id="project-open">
<h1>Open a Project</h1>
<div id="projects-container"></div>
<div class="content-block-footer"><a href="javascript:openWorkspaceDir()" class="secondary">Browse workspace directory</a></div>
</div>
<div id="project-create">
<h1 style="$newStyle">Create a New Project</h1>
<div style="$newStyle" id="import-panel"><table id="import-panel-layout">
<tr>
<td id="import-panel-tab-headers">
<div>Import data from</div>
</td>
<td id="import-panel-tab-bodies"></td>
</tr>
<tr>
<td colspan="2" id="import-panel-message">
<h3>What kinds of data files can I import?</h3>
<div>TSV, CSV, *SV, Excel (.xls and .xlsx), JSON, XML, RDF as XML, and
Google Spreadsheets are all supported. Support for other formats can
be added with Refine extensions.
</div>
</td>
</tr>
</table></div>
<div style="$newStyle" id="import-progress-panel">
<div class="grid-layout layout-normal layout-full"><table>
<tr><td colspan="3" id="import-progress-message"></td></tr>
<tr><td colspan="3">
<div id="import-progress-bar-frame"><div id="import-progress-bar-body"></div></div>
</td></tr>
<tr>
<td id="import-progress-message-left"></td>
<td id="import-progress-message-center"></td>
<td id="import-progress-message-right"></td>
</tr>
<tr><td colspan="3">
<button class="button" id="import-progress-cancel-button">Cancel</button>
</td></tr>
</table></div>
<iframe id="import-iframe" name="import-iframe"></iframe>
</div>
<div style="$newStyle" id="import-error-panel"><div class="grid-layout layout-normal layout-full"><table>
<tr><td id="import-error-message"></td></tr>
<tr><td id="import-error-stack"></td></tr>
<tr><td><button class="button button-primary" id="import-error-ok-button">OK</button></td></tr>
</table></div></div>
<form style="$oldStyle" id="file-upload-form" method="post" enctype="multipart/form-data" action="/command/core/create-project-from-upload" accept-charset="UTF-8">
<h1>Create a New Project</h1>
<h2 id="project-toggle">
<a class="secondary" href="javascript:showHide('file-upload-form', 'project-upload-form')">or Import an Existing Project</a>
</h2>
<div class="project-create-basic">
<table class="form-table">
<tr>
<th><label for="project-file">Data file:</label></th>
<td><input type="file" id="project-file-input" name="project-file" /></td>
</tr>
<tr>
<th><label for="project-url">or data file URL:</label></th>
<td><input type="text" id="project-url-input" name="project-url" size="40" /></td>
</tr>
<tr>
<th><label for="project-name">Project name:</label></th>
<td><input type="text" size="25" id="project-name-input" name="project-name" /></td></tr>
<tr>
<td></td>
<td><input type="submit" value="Create Project" id="upload-file-button" class="button button-primary" /></td>
</tr>
</table>
</div>
<div class="project-create-advanced">
<h2>Advanced Options</h2>
<div class="project-create-option">
Limit load to:
<div class="project-create-suboption">
<input type="text" id="limit-input" name="limit" size="5" /> rows (blank for all)
</div>
</div>
<div class="project-create-option">
Ignore:
<div class="project-create-suboption">
<input type="text" id="ignore-input" name="ignore" size="5" value="0" /> initial non-blank lines
</div>
</div>
<div class="project-create-option">
Skip:
<div class="project-create-suboption">
<input type="text" id="skip-input" name="skip" size="5" value="0" /> initial data rows
</div>
</div>
<div id="project-create-parsetext">
<div class="project-create-option">
When parsing text files:
<div class="project-create-suboption">
<input id="split-into-columns-input" type="checkbox" checked="true" name="split-into-columns" />
Split into columns
</div>
<div class="project-create-suboption">
Column separator:
<input type="text" id="separator-input" name="separator" size="2" /><br />
(leave blank to auto-detect)
</div>
<div class="project-create-suboption">
<input type="checkbox" id="guess-value-type-input" name="guess-value-type" checked="true" />
Auto-detect value types<br />
(numbers, dates, etc)
</div>
<div class="project-create-suboption">
Header lines: <input type="text" id="header-lines-input" name="header-lines" size="5" value="1" /><br />
(use 0 if your data has no header)
</div>
<div class="project-create-suboption">
<input type="checkbox" id="ignore-quotes-input" name="ignore-quotes" />
Ignore quotation marks
</div>
</div>
</div>
</div>
</form>
<form style="display: none;" id="project-upload-form" method="post" enctype="multipart/form-data" action="/command/core/import-project" accept-charset="UTF-8" style="display:none;">
<h1>Import an Existing Project</h1>
<h2 id="project-toggle">
<a class="secondary" href="javascript:showHide('project-upload-form', 'file-upload-form')">or Create a New Project</a>
</h2>
<div class="project-create-basic">
<p>Import an existing Google Refine .tar or .tar.gz project file:</p>
<table class="form-table">
<tr>
<th><label for="project-file">Project file:</label></th>
<td><input type="file" id="project-tar-file-input" name="project-file" /></td>
</tr>
<tr>
<th><label for="project-name">Project name (optional):</label></th>
<td><input type="text" size="25" id="project-name-input" name="project-name" /></td></tr>
<tr>
<td></td>
<td><input type="submit" value="Import Project" id="import-project-button" class="button button-primary" /></td>
</tr>
</table>
</div>
</form>
</div>
<div id="project-links">
<ul>
<li><a href="/about.html">About Google Refine</a></li>
<li><a href="https://code.google.com/p/google-refine/">Project Home Page</a></li>
<li><a href="http://code.google.com/p/google-refine/wiki/Screencasts">Screencasts</a></li>
<li><a href="http://code.google.com/p/google-refine/wiki/DocumentationForUsers">Help Documentation</a></li>
</ul>
<div id="google-refine-version"></div>
</div>
</div>
</div>
</body>
</html>

View File

@ -31,64 +31,27 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
function onClickUploadFileButton(evt) {
var projectName = $("#project-name-input")[0].value;
var dataURL = $.trim($("#project-url-input")[0].value);
if (! $.trim(projectName).length) {
window.alert("You must specify a project name.");
var GoogleRefineVersion;
} else if ($("#project-file-input")[0].files.length === 0 && ! dataURL.length) {
window.alert("You must specify a data file to upload or a URL to retrieve.");
var Refine = {
actionAreas: []
};
} else {
$("#file-upload-form").attr("action",
"/command/core/create-project-from-upload?" + [
"url=" + escape(dataURL),
"split-into-columns=" + $("#split-into-columns-input")[0].checked,
"separator=" + $("#separator-input")[0].value,
"ignore=" + $("#ignore-input")[0].value,
"header-lines=" + $("#header-lines-input")[0].value,
"skip=" + $("#skip-input")[0].value,
"limit=" + $("#limit-input")[0].value,
"guess-value-type=" + $("#guess-value-type-input")[0].checked,
"ignore-quotes=" + $("#ignore-quotes-input")[0].checked
].join("&"));
Refine.selectActionArea = function(id) {
$('.action-area-tab').removeClass('selected');
$('.action-area-tab-body').css('visibility', 'hidden').css('z-index', '100');
return true;
}
evt.preventDefault();
return false;
}
function formatDate(d) {
var d = new Date(d);
var last_year = Date.today().add({ years: -1 });
var last_month = Date.today().add({ months: -1 });
var last_week = Date.today().add({ days: -7 });
var today = Date.today();
var tomorrow = Date.today().add({ days: 1 });
if (d > today) {
return "today " + d.toString("h:mm tt");
} else if (d.between(last_week, today)) {
var diff = Math.floor(today.getDayOfYear() - d.getDayOfYear());
return (diff <= 1) ? ("yesterday " + d.toString("h:mm tt")) : (diff + " days ago");
} else if (d.between(last_month, today)) {
var diff = Math.floor((today.getDayOfYear() - d.getDayOfYear()) / 7);
if (diff < 0) {diff += 52;}
return (diff == 1) ? "a week ago" : diff.toFixed(0) + " weeks ago" ;
} else if (d.between(last_year, today)) {
var diff = Math.floor(today.getMonth() - d.getMonth());
if (diff < 0) {diff += 12;}
return (diff == 1) ? "a month ago" : diff + " months ago";
} else {
var diff = Math.floor(today.getYear() - d.getYear());
return (diff == 1) ? "a year ago" : diff + " years ago";
for (var i = 0; i < Refine.actionAreas.length; i++) {
var actionArea = Refine.actionAreas[i];
if (id == actionArea.id) {
actionArea.tabElmt.addClass('selected');
actionArea.bodyElmt.css('visibility', 'visible').css('z-index', '110');;
}
}
};
function isThereNewRelease() {
$(function() {
var isThereNewRelease = function() {
var thisRevision = GoogleRefineVersion.revision;
var revision_pattern = /r([0-9]+)/;
@ -103,150 +66,9 @@ function isThereNewRelease() {
var latestRev = parseInt(revision_pattern.exec(GoogleRefineReleases.releases[0].revision)[1],10);
return latestRev > thisRev;
}
function fetchProjects() {
$.getJSON(
"/command/core/get-all-project-metadata",
null,
function(data) {
renderProjects(data);
},
"json"
);
}
function renderProjects(data) {
var projects = [];
for (var n in data.projects) {
if (data.projects.hasOwnProperty(n)) {
var project = data.projects[n];
project.id = n;
project.date = Date.parseExact(project.modified, "yyyy-MM-ddTHH:mm:ssZ");
projects.push(project);
}
}
projects.sort(function(a, b) { return b.date.getTime() - a.date.getTime(); });
var container = $("#projects-container").empty();
if (!projects.length) {
$("#no-project-message").clone().show().appendTo(container);
} else {
var table = $(
'<table class="list-table"><tr>' +
'<th>Name</th>' +
'<th></th>' +
'<th></th>' +
'<th align="right">Last&nbsp;modified</th>' +
'</tr></table>'
).appendTo(container)[0];
var renderProject = function(project) {
var tr = table.insertRow(table.rows.length);
tr.className = "project";
var nameLink = $('<a></a>')
.addClass("list-table-itemname")
.text(project.name)
.attr("href", "/project?project=" + project.id)
.appendTo(tr.insertCell(tr.cells.length));
var renameLink = $('<a></a>')
.text("rename")
.addClass("secondary")
.attr("href", "javascript:{}")
.css("visibility", "hidden")
.click(function() {
var name = window.prompt("New project name:", project.name);
if (name == null) {
return;
}
name = $.trim(name);
if (project.name == name || name.length == 0) {
return;
}
$.ajax({
type: "POST",
url: "/command/core/rename-project",
data: { "project" : project.id, "name" : name },
dataType: "json",
success: function (data) {
if (data && typeof data.code != 'undefined' && data.code == "ok") {
nameLink.text(name);
} else {
alert("Failed to rename project: " + data.message);
}
}
});
}).appendTo(tr.insertCell(tr.cells.length));
var deleteLink = $('<a></a>')
.addClass("delete-project")
.attr("title","Delete this project")
.attr("href","")
.css("visibility", "hidden")
.html("<img src='/images/close.png' />")
.click(function() {
if (window.confirm("Are you sure you want to delete project \"" + project.name + "\"?")) {
$.ajax({
type: "POST",
url: "/command/core/delete-project",
data: { "project" : project.id },
dataType: "json",
success: function (data) {
if (data && typeof data.code != 'undefined' && data.code == "ok") {
fetchProjects();
}
}
});
}
return false;
}).appendTo(tr.insertCell(tr.cells.length));
$('<div></div>')
.html(formatDate(project.date))
.addClass("last-modified")
.attr("title", project.date.toString())
.appendTo(tr.insertCell(tr.cells.length));
$(tr).mouseenter(function() {
renameLink.css("visibility", "visible");
deleteLink.css("visibility", "visible");
}).mouseleave(function() {
renameLink.css("visibility", "hidden");
deleteLink.css("visibility", "hidden");
});
};
for (var i = 0; i < projects.length; i++) {
renderProject(projects[i]);
}
}
}
function showHide(toHide, toShow) {
$("#" + toHide).hide();
$("#" + toShow).show();
}
function openWorkspaceDir() {
$.ajax({
type: "POST",
url: "/command/core/open-workspace-dir",
dataType: "json",
success: function (data) {
if (data.code != "ok" && "message" in data) {
alert(data.message);
}
}
});
}
var GoogleRefineVersion;
function showVersion() {
var showVersion = function() {
$.getJSON(
"/command/core/get-version",
null,
@ -264,10 +86,10 @@ function showVersion() {
if ("releases" in window) {
if (isThereNewRelease()) {
var container = $('<div id="notification-container">')
.appendTo(document.body);
.appendTo(document.body)
var notification = $('<div id="notification">')
.text('New version! ')
.appendTo(container);
.appendTo(container)
$('<a>')
.addClass('notification-action')
.attr("href", releases.homepage)
@ -281,174 +103,72 @@ function showVersion() {
window.setTimeout(poll, 1000);
}
);
}
function renderImportPanel() {
var headerContainer = $('#import-panel-tab-headers');
var bodyContainer = $('#import-panel-tab-bodies');
var selectImportSourceTab = function(importSource) {
$('.import-panel-tab-body').hide();
$('.import-panel-tab-header').removeClass('selected');
importSource._divBody.show();
importSource._divHeader.addClass('selected');
importSource._ui.focus();
};
var createImportSourceTab = function(importSource) {
importSource._divBody = $('<div>')
.addClass('import-panel-tab-body')
.appendTo(bodyContainer)
.hide();
var resize = function() {
var leftPanelWidth = 150;
// px
var width = $(window).width();
var height = $(window).height();
var headerHeight = $('#header').outerHeight();
var panelHeight = height - headerHeight;
importSource._divHeader = $('<div>')
.addClass('import-panel-tab-header')
.text(importSource.label)
.appendTo(headerContainer)
.click(function() { selectImportSourceTab(importSource); });
$('.main-layout-panel')
.css("top", headerHeight + "px")
.css("bottom", "0px")
.css("height", panelHeight + "px")
.css("visibility", "visible");
importSource._ui = new importSource.ui(importSource._divBody);
$('#left-panel')
.css("left", "0px")
.css("width", leftPanelWidth + "px");
var leftPanelBodyHPaddings = 10;
// px
var leftPanelBodyVPaddings = 0;
// px
$('#left-panel-body')
.css("margin-left", leftPanelBodyHPaddings + "px")
.css("margin-top", leftPanelBodyVPaddings + "px")
.css("width", ($('#left-panel').width() - leftPanelBodyHPaddings) + "px")
.css("height", ($('#left-panel').height() - leftPanelBodyVPaddings) + "px");
$('#right-panel')
.css("left", leftPanelWidth + "px")
.css("width", (width - leftPanelWidth) + "px");
var rightPanelBodyHPaddings = 5;
// px
var rightPanelBodyVPaddings = 5;
// px
$('#right-panel-body')
.css("margin-left", rightPanelBodyHPaddings + "px")
.css("margin-top", rightPanelBodyVPaddings + "px")
.css("width", ($('#right-panel').width() - rightPanelBodyHPaddings) + "px")
.css("height", ($('#right-panel').height() - rightPanelBodyVPaddings) + "px");
};
$(window).bind("resize", resize);
window.setTimeout(resize, 50); // for Chrome, give the window some time to layout first
var renderActionArea = function(actionArea) {
actionArea.bodyElmt = $('<div>')
.addClass('action-area-tab-body')
.appendTo('#right-panel-body');
actionArea.tabElmt = $('<li>')
.addClass('action-area-tab')
.text(actionArea.label)
.appendTo($('#action-area-tabs'))
.click(function() {
Refine.selectActionArea(actionArea.id);
});
actionArea.ui = new actionArea.uiClass(actionArea.bodyElmt);
};
for (var i= 0; i < ImportSources.length; i++) {
createImportSourceTab(ImportSources[i]);
for (var i = 0; i < Refine.actionAreas.length; i++) {
renderActionArea(Refine.actionAreas[i]);
}
selectImportSourceTab(ImportSources[0]);
}
function startImportJob(importSource, form, progressMessage) {
$.post(
"/command/core/create-import-job",
null,
function(data) {
var jobID = data.jobID;
form.attr("method", "post")
.attr("enctype", "multipart/form-data")
.attr("accept-charset", "UTF-8")
.attr("target", "import-iframe")
.attr("action", "/command/core/retrieve-import-content?" + $.param({
"jobID" : jobID,
"source" : importSource
}));
form[0].submit();
var start = new Date();
var timerID = window.setInterval(function() { pollImportJob(start, jobID, timerID); }, 1000);
initializeImportProgressPanel(progressMessage, jobID, timerID);
},
"json"
);
}
function initializeImportProgressPanel(progressMessage, jobID, timerID) {
$('#import-progress-message').text(progressMessage);
$('#import-progress-bar-body').css("width", "0%");
$('#import-progress-message-left').text('Starting');
$('#import-progress-message-center').empty();
$('#import-progress-message-right').empty();
$('#import-panel').hide();
$('#import-progress-panel').show();
$('#import-progress-cancel-button').unbind().click(function() {
$('#import-panel').show();
$('#import-progress-panel').hide();
// stop the iframe
$('#import-iframe')[0].contentWindow.stop();
// stop the timed polling
window.clearInterval(timerID);
// explicitly cancel the import job
$.post("/command/core/cancel-import-job?" + $.param({ "jobID" : jobID }));
});
}
function bytesToString(b) {
if (b >= 1024 * 1024) {
return Math.round(b / (1024 * 1024)) + " MB";
} else if (b >= 1024) {
return Math.round(b / 1024) + " KB";
} else {
return b + " bytes";
}
}
function pollImportJob(start, jobID, timerID) {
$.post(
"/command/core/get-import-job-status?" + $.param({ "jobID" : jobID }),
null,
function(data) {
if (data.code == "error") {
showImportJobError(data.message);
window.clearInterval(timerID);
} else if (data.state == "error") {
showImportJobError(data.message, data.stack);
window.clearInterval(timerID);
} else if (data.state == "retrieving") {
if (data.progress < 0) {
$('#import-progress-message-left').text(bytesToString(data.bytesSaved) + " saved");
} else {
$('#import-progress-bar-body').css("width", data.progress + "%");
$('#import-progress-message-left').text(data.progress + "% saved");
}
} else if (data.state == "ready") {
window.clearInterval(timerID);
// Just so if the user clicks Back the progress panel won't be showing if the DOM is cached.
$('#import-progress-panel').hide();
$('#import-panel').show();
window.location = "/import?" + $.param({ "jobID" : jobID });
}
},
"json"
);
}
function showImportJobError(message, stack) {
$('#import-error-message').text(message);
$('#import-error-stack').text(stack || 'No technical details.');
$('#import-progress-panel').hide();
$('#import-error-panel').show();
$('#import-error-ok-button').unbind().click(function() {
$('#import-error-panel').hide();
$('#import-panel').show();
});
}
function onLoad() {
renderImportPanel();
fetchProjects();
$("#project-file-input").change(function() {
if ($("#project-name-input")[0].value.length == 0) {
var fileName = this.files[0].fileName;
if (fileName) {
$("#project-name-input")[0].value = fileName.replace(/\.\w+/, "").replace(/[_-]/g, " ");
}
$("#project-name-input").focus().select();
}
}).keypress(function(evt) {
if (evt.keyCode == 13) {
onClickUploadFileButton();
}
});
$("#upload-file-button").click(onClickUploadFileButton);
$("#more-options-link").click(function() {
$("#more-options-controls").hide();
$("#more-options").show();
});
Refine.selectActionArea('create-project');
showVersion();
}
$(onLoad);
});

View File

@ -0,0 +1,17 @@
<div id="create-project-ui-source-selection" class="relative-frame"><table id="create-project-ui-source-selection-layout">
<tr>
<td colspan="2" id="create-project-ui-source-selection-message">
<h3>Create a project by importing data. What kinds of data files can I import?</h3>
<div>TSV, CSV, *SV, Excel (.xls and .xlsx), JSON, XML, RDF as XML, and
Google Spreadsheets are all supported. Support for other formats can
be added with Refine extensions.
</div>
</td>
</tr>
<tr>
<td id="create-project-ui-source-selection-tabs">
<div>Get data from</div>
</td>
<td id="create-project-ui-source-selection-tab-bodies"></td>
</tr>
</table></div>

View File

@ -0,0 +1,145 @@
/*
Copyright 2011, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
Refine.CreateProjectUI = function(elmt) {
var self = this;
this._elmt = elmt;
this._sourceSelectionUIs = [];
this._customPanels = [];
this._controllers = [];
$.post(
"/command/core/get-importing-configuration",
null,
function(data) {
Refine.importingConfig = data.config;
self._initializeUI();
},
"json"
);
};
Refine.CreateProjectUI.controllers = [];
Refine.CreateProjectUI.prototype._initializeUI = function() {
this._sourceSelectionElmt =
$(DOM.loadHTML("core", "scripts/index/create-project-ui-source-selection.html")).appendTo(this._elmt);
this._sourceSelectionElmts = DOM.bind(this._sourceSelectionElmt);
for (var i = 0; i < Refine.CreateProjectUI.controllers.length; i++) {
this._controllers.push(new Refine.CreateProjectUI.controllers[i](this));
}
};
Refine.CreateProjectUI.prototype.addSourceSelectionUI = function(sourceSelectionUI) {
var self = this;
var headerContainer = $('#create-project-ui-source-selection-tabs');
var bodyContainer = $('#create-project-ui-source-selection-tab-bodies');
sourceSelectionUI._divBody = $('<div>')
.addClass('create-project-ui-source-selection-tab-body')
.appendTo(bodyContainer)
.hide();
sourceSelectionUI._divHeader = $('<div>')
.addClass('create-project-ui-source-selection-tab')
.text(sourceSelectionUI.label)
.appendTo(headerContainer)
.click(function() { self.selectImportSource(sourceSelectionUI.id); });
sourceSelectionUI.ui.attachUI(sourceSelectionUI._divBody);
this._sourceSelectionUIs.push(sourceSelectionUI);
if (this._sourceSelectionUIs.length == 1) {
self.selectImportSource(sourceSelectionUI.id);
}
};
Refine.CreateProjectUI.prototype.selectImportSource = function(id) {
for (var i = 0; i < this._sourceSelectionUIs.length; i++) {
var sourceSelectionUI = this._sourceSelectionUIs[i];
if (sourceSelectionUI.id == id) {
$('.create-project-ui-source-selection-tab-body').hide();
$('.create-project-ui-source-selection-tab').removeClass('selected');
sourceSelectionUI._divBody.show();
sourceSelectionUI._divHeader.addClass('selected');
sourceSelectionUI.ui.focus();
break;
}
}
};
Refine.CreateProjectUI.prototype.addCustomPanel = function() {
var div = $('<div>')
.addClass('create-project-ui-panel')
.appendTo(this._elmt);
var innerDiv = $('<div>')
.addClass('relative-frame')
.appendTo(div);
this._customPanels.push(div);
return innerDiv;
};
Refine.CreateProjectUI.prototype.showCustomPanel = function(div) {
var parent = div.parent();
for (var i = 0; i < this._customPanels.length; i++) {
var panel = this._customPanels[i];
if (panel[0] === parent[0]) {
$('.create-project-ui-panel').css('visibility', 'hidden');
this._sourceSelectionElmt.css('visibility', 'hidden');
panel.css('visibility', 'visible');
break;
}
}
};
Refine.CreateProjectUI.prototype.showSourceSelectionPanel = function() {
$('.create-project-ui-panel').css('visibility', 'hidden');
this._sourceSelectionElmt.css('visibility', 'visible');
};
Refine.actionAreas.push({
id: "create-project",
label: "Create Project",
uiClass: Refine.CreateProjectUI
});

View File

@ -0,0 +1,395 @@
/*
Copyright 2011, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
Refine.DefaultImportingController = function(createProjectUI) {
this._createProjectUI = createProjectUI;
this._progressPanel = createProjectUI.addCustomPanel();
this._progressPanel.html(DOM.loadHTML("core", "scripts/index/default-importing-controller/progress-panel.html"));
this._errorPanel = createProjectUI.addCustomPanel();
this._errorPanel.html(DOM.loadHTML("core", "scripts/index/default-importing-controller/error-panel.html"));
this._fileSelectionPanel = createProjectUI.addCustomPanel();
this._parsingPanel = createProjectUI.addCustomPanel();
for (var i = 0; i < Refine.DefaultImportingController.sources.length; i++) {
var sourceSelectionUI = Refine.DefaultImportingController.sources[i];
sourceSelectionUI.ui = new sourceSelectionUI.uiClass(this);
createProjectUI.addSourceSelectionUI(sourceSelectionUI);
}
};
Refine.CreateProjectUI.controllers.push(Refine.DefaultImportingController);
Refine.DefaultImportingController.sources = [];
Refine.DefaultImportingController.parserUIs = {};
Refine.DefaultImportingController.prototype._startOver = function() {
this._disposeFileSelectionPanel();
this._disposeFileSelectionPanel();
delete this._fileSelectionPanelElmts;
delete this._parsingPanelElmts;
delete this._jobID;
delete this._job;
delete this._extensions;
delete this._format;
delete this._parserOptions;
delete this._projectName;
this._createProjectUI.showSourceSelectionPanel();
};
Refine.DefaultImportingController.prototype.startImportJob = function(form, progressMessage, callback) {
var self = this;
$.post(
"/command/core/create-importing-job",
null,
function(data) {
var jobID = self._jobID = data.jobID;
form.attr("method", "post")
.attr("enctype", "multipart/form-data")
.attr("accept-charset", "UTF-8")
.attr("target", "default-importing-iframe")
.attr("action", "/command/core/importing-controller?" + $.param({
"controller": "core/default-importing-controller",
"jobID": jobID,
"subCommand": "load-raw-data"
}));
form[0].submit();
var start = new Date();
var timerID = window.setInterval(
function() {
self._pollImportJob(
start, jobID, timerID,
function(job) {
return job.config.hasData;
},
function(jobID, job) {
self._job = job;
self._onImportJobReady();
if (callback) {
callback(jobID, job);
}
}
);
},
1000
);
self._initializeImportProgressPanel(progressMessage, function() {
// stop the iframe
$('#default-importing-iframe')[0].contentWindow.stop();
// stop the timed polling
window.clearInterval(timerID);
// explicitly cancel the import job
$.post("/command/core/cancel-importing-job?" + $.param({ "jobID": jobID }));
self._createProjectUI.showSourceSelectionPanel();
});
},
"json"
);
};
Refine.DefaultImportingController.prototype._initializeImportProgressPanel = function(progressMessage, onCancel) {
var self = this;
this._createProjectUI.showCustomPanel(this._progressPanel);
$('#default-importing-progress-message').text(progressMessage);
$('#default-importing-progress-bar-body').css("width", "0%");
$('#default-importing-progress-message-left').text('Starting');
$('#default-importing-progress-message-center').empty();
$('#default-importing-progress-message-right').empty();
$('#default-importing-progress-timing').empty();
$('#default-importing-progress-cancel-button').unbind().click(onCancel);
};
Refine.DefaultImportingController.prototype._pollImportJob = function(start, jobID, timerID, checkDone, callback) {
var self = this;
$.post(
"/command/core/get-importing-job-status?" + $.param({ "jobID": jobID }),
null,
function(data) {
if (!(data)) {
self._showImportJobError("Unknown error");
window.clearInterval(timerID);
return;
} else if (data.code == "error" || !("job" in data)) {
self._showImportJobError(data.message || "Unknown error");
window.clearInterval(timerID);
return;
}
var job = data.job;
if (checkDone(job)) {
$('#default-importing-progress-message').text('Done.');
window.clearInterval(timerID);
if (callback) {
callback(jobID, job);
}
} else {
var progress = job.config.progress;
if (progress.percent > 0) {
var secondsSpent = (new Date().getTime() - start.getTime()) / 1000;
var secondsRemaining = (100 / progress.percent) * secondsSpent - secondsSpent;
$('#default-importing-progress-bar-body')
.removeClass('indefinite')
.css("width", progress.percent + "%");
if (secondsRemaining > 1) {
if (secondsRemaining > 60) {
$('#default-importing-progress-timing').text(
Math.ceil(secondsRemaining / 60) + " minutes remaining");
} else {
$('#default-importing-progress-timing').text(
Math.ceil(secondsRemaining) + " seconds remaining");
}
} else {
$('#default-importing-progress-timing').text('almost done ...');
}
} else {
$('#default-importing-progress-bar-body').addClass('indefinite');
$('#default-importing-progress-timing').empty();
}
$('#default-importing-progress-message').text(progress.message);
}
},
"json"
);
};
Refine.DefaultImportingController.prototype._showImportJobError = function(message, stack) {
var self = this;
$('#default-importing-error-message').text(message);
$('#default-importing-error-stack').text(stack || 'No technical details.');
this._createProjectUI.showCustomPanel(this._errorPanel);
$('#default-importing-error-ok-button').unbind().click(function() {
self._createProjectUI.showSourceSelectionPanel();
});
};
Refine.DefaultImportingController.prototype._onImportJobReady = function() {
this._prepareData();
if (this._job.config.retrievalRecord.files.length > 1) {
this._showFileSelectionPanel();
} else {
this._showParsingPanel(false);
}
};
Refine.DefaultImportingController.prototype._prepareData = function() {
var extensionMap = {};
var extensionList = [];
var files = this._job.config.retrievalRecord.files;
var fileSelection = this._job.config.fileSelection;
for (var i = 0; i < files.length; i++) {
var file = files[i];
file.selected = false;
var slash = file.fileName.lastIndexOf('/');
var dot = file.fileName.lastIndexOf('.');
if (dot > slash + 1) {
var extension = file.fileName.substring(dot);
if (extension in extensionMap) {
extensionMap[extension].count++;
} else {
extensionMap[extension] = { extension: extension, count: 1 };
extensionList.push(extensionMap[extension]);
}
}
}
for (var i = 0; i < fileSelection.length; i++) {
files[fileSelection[i]].selected = true;
}
extensionList.sort(function(a, b) {
return b.count - a.count;
});
this._extensions = extensionList;
};
Refine.DefaultImportingController.prototype._ensureFormatParserUIHasInitializationData = function(format, onDone) {
if (!(format in this._parserOptions)) {
var self = this;
var dismissBusy = DialogSystem.showBusy("Inspecting selected files ...");
$.post(
"/command/core/importing-controller?" + $.param({
"controller": "core/default-importing-controller",
"jobID": this._jobID,
"subCommand": "initialize-parser-ui",
"format": format
}),
null,
function(data) {
dismissBusy();
if (data.options) {
self._parserOptions[format] = data.options;
onDone();
}
},
"json"
);
} else {
onDone();
}
};
Refine.DefaultImportingController.prototype.updateFormatAndOptions = function(options, callback) {
var self = this;
$.post(
"/command/core/importing-controller?" + $.param({
"controller": "core/default-importing-controller",
"jobID": this._jobID,
"subCommand": "update-format-and-options"
}),
{
"format" : this._format,
"options" : JSON.stringify(options)
},
callback,
"json"
);
};
Refine.DefaultImportingController.prototype.getPreviewData = function(callback, numRows) {
var self = this;
var result = {};
$.post(
"/command/core/get-models?" + $.param({ "importingJobID" : this._jobID }),
null,
function(data) {
for (var n in data) {
if (data.hasOwnProperty(n)) {
result[n] = data[n];
}
}
$.post(
"/command/core/get-rows?" + $.param({
"importingJobID" : self._jobID,
"start" : 0,
"limit" : numRows || 100 // More than we parse for preview anyway
}),
null,
function(data) {
// Un-pool objects
for (var r = 0; r < data.rows.length; r++) {
var row = data.rows[r];
for (var c = 0; c < row.cells.length; c++) {
var cell = row.cells[c];
if ((cell) && ("r" in cell)) {
cell.r = data.pool.recons[cell.r];
}
}
}
result.rowModel = data;
callback(result);
},
"jsonp"
);
},
"json"
);
};
Refine.DefaultImportingController.prototype._createProject = function() {
if ((this._formatParserUI) && this._formatParserUI.confirmReadyToCreateProject()) {
var projectName = $.trim(this._parsingPanelElmts.projectNameInput[0].value);
if (projectName.length == 0) {
window.alert("Please name the project.");
this._parsingPanelElmts.focus();
return;
}
var self = this;
var options = this._formatParserUI.getOptions();
options.projectName = projectName;
$.post(
"/command/core/importing-controller?" + $.param({
"controller": "core/default-importing-controller",
"jobID": this._jobID,
"subCommand": "create-project"
}),
{
"format" : this._format,
"options" : JSON.stringify(options)
},
function() {
var start = new Date();
var timerID = window.setInterval(
function() {
self._pollImportJob(
start,
self._jobID,
timerID,
function(job) {
return "projectID" in job.config;
},
function(jobID, job) {
document.location = "project?project=" + job.config.projectID;
}
);
},
1000
);
self._initializeImportProgressPanel("Creating project ...", function() {
// stop the timed polling
window.clearInterval(timerID);
// explicitly cancel the import job
$.post("/command/core/cancel-importing-job?" + $.param({ "jobID": jobID }));
self._createProjectUI.showSourceSelectionPanel();
});
},
"json"
);
}
};

View File

@ -0,0 +1,5 @@
<div id="default-importing-error-panel"><div class="grid-layout layout-normal layout-full"><table>
<tr><td id="default-importing-error-message"></td></tr>
<tr><td id="default-importing-error-stack"></td></tr>
<tr><td><button class="button button-primary" id="default-importing-error-ok-button">OK</button></td></tr>
</table></div></div>

View File

@ -0,0 +1,36 @@
<div bind="wizardHeader" class="default-importing-wizard-header"><div class="grid-layout layout-tightest layout-full"><table><tr>
<td width="1%"><button bind="startOverButton" class="button">&laquo; Start Over</button></td>
<td width="98%">Select Files to Import</td>
<td width="1%"><button bind="nextButton" class="button button-primary">Configure Parsing Opions &raquo;</button></td>
</tr></table></div></div>
<div bind="controlPanel" class="default-importing-file-selection-control-panel">
<div class="grid-layout layout-full layout-tighter"><table>
<tr>
<td colspan="3">
There are several files available to import.
Please select the desired ones.
</td>
</tr>
<tr>
<td width="98%" bind="summary"></td>
<td width="1%"><button bind="selectAllButton" class="button">Select All</button></td>
<td width="1%"><button bind="unselectAllButton" class="button">Unselect All</button></td>
</tr>
</table></div>
<h2>Select by Extension</h2>
<div bind="extensionContainer" class="grid-layout layout-full layout-tightest"></div>
<h2>Select by Regex on File Names</h2>
<div class="grid-layout layout-full layout-tighter"><table>
<tr><td colspan="3"><input bind="regexInput" style="width: 100%;"/></td></tr>
<tr>
<td bind="regexSummary"></td>
<td width="1%"><button bind="selectRegexButton" class="button">Select</button></td>
<td width="1%"><button bind="unselectRegexButton" class="button">Unselect</button></td>
</tr>
</table></div>
</div>
<div bind="filePanel" class="default-importing-file-selection-file-panel"></div>

View File

@ -0,0 +1,314 @@
/*
Copyright 2011, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
Refine.DefaultImportingController.prototype._showFileSelectionPanel = function() {
var self = this;
this._prepareFileSelectionPanel();
this._fileSelectionPanelElmts.nextButton.click(function() {
self._commitFileSelection();
});
this._renderFileSelectionPanel();
this._createProjectUI.showCustomPanel(this._fileSelectionPanel);
};
Refine.DefaultImportingController.prototype._disposeFileSelectionPanel = function() {
if (this._fileSelectionPanelResizer) {
$(window).unbind("resize", this._fileSelectionPanelResizer);
}
this._fileSelectionPanel.unbind().empty();
};
Refine.DefaultImportingController.prototype._prepareFileSelectionPanel = function() {
var self = this;
this._fileSelectionPanel.unbind().empty().html(
DOM.loadHTML("core", "scripts/index/default-importing-controller/file-selection-panel.html"));
this._fileSelectionPanelElmts = DOM.bind(this._fileSelectionPanel);
this._fileSelectionPanelElmts.startOverButton.click(function() {
self._startOver();
});
this._fileSelectionPanelResizer = function() {
var elmts = self._fileSelectionPanelElmts;
var width = self._fileSelectionPanel.width();
var height = self._fileSelectionPanel.height();
var headerHeight = elmts.wizardHeader.outerHeight(true);
var controlPanelWidth = 350;
elmts.controlPanel
.css("left", "0px")
.css("top", headerHeight + "px")
.css("width", (controlPanelWidth - DOM.getHPaddings(elmts.controlPanel)) + "px")
.css("height", (height - headerHeight - DOM.getVPaddings(elmts.controlPanel)) + "px");
elmts.filePanel
.css("left", controlPanelWidth + "px")
.css("top", headerHeight + "px")
.css("width", (width - controlPanelWidth - DOM.getHPaddings(elmts.filePanel)) + "px")
.css("height", (height - headerHeight - DOM.getVPaddings(elmts.filePanel)) + "px");
};
$(window).resize(this._fileSelectionPanelResizer);
this._fileSelectionPanelResizer();
};
Refine.DefaultImportingController.prototype._renderFileSelectionPanel = function() {
this._renderFileSelectionPanelFileTable();
this._renderFileSelectionPanelControlPanel();
};
Refine.DefaultImportingController.prototype._renderFileSelectionPanelFileTable = function() {
var self = this;
this._fileSelectionPanelElmts.filePanel.empty();
var fileTable = $('<table><tr><th></th><th>Name</th><th>Mime-type</th><th>Format</th><th>Size</th></tr></table>')
.appendTo(this._fileSelectionPanelElmts.filePanel)[0];
var files = this._job.config.retrievalRecord.files;
var renderFile = function(fileRecord, index) {
var tr = fileTable.insertRow(fileTable.rows.length);
$(tr).addClass(index % 2 == 0 ? 'even' : 'odd');
var tdSelect = $('<td>').appendTo(tr);
var checkbox = $('<input>')
.attr("type", "checkbox")
.attr("index", index)
.appendTo(tdSelect)
.click(function() {
files[index].selected = this.checked;
self._updateFileSelectionSummary();
});
if (fileRecord.selected) {
checkbox.attr("checked", "checked");
}
$('<td>').text(fileRecord.fileName).addClass("default-importing-file-selection-filename").appendTo(tr);
$('<td>').text(fileRecord.declaredMimeType || fileRecord.mimeType || "unknown").appendTo(tr);
$('<td>').text(fileRecord.format || "unknown").appendTo(tr);
$('<td>').text(fileRecord.size + " bytes").appendTo(tr);
};
for (var i = 0; i < files.length; i++) {
renderFile(files[i], i);
}
};
Refine.DefaultImportingController.prototype._renderFileSelectionPanelControlPanel = function() {
var self = this;
var files = this._job.config.retrievalRecord.files;
this._fileSelectionPanelElmts.extensionContainer.empty();
this._fileSelectionPanelElmts.selectAllButton.unbind().click(function(evt) {
for (var i = 0; i < files.length; i++) {
files[i].selected = true;
}
self._fileSelectionPanelElmts.filePanel.find("input").attr("checked", "checked");
self._updateFileSelectionSummary();
});
this._fileSelectionPanelElmts.unselectAllButton.unbind().click(function(evt) {
for (var i = 0; i < files.length; i++) {
files[i].selected = false;
}
self._fileSelectionPanelElmts.filePanel.find("input").removeAttr("checked");
self._updateFileSelectionSummary();
});
var table = $('<table></table>')
.appendTo(this._fileSelectionPanelElmts.extensionContainer)[0];
var renderExtension = function(extension) {
var tr = table.insertRow(table.rows.length);
$('<td>').text(extension.extension).appendTo(tr);
$('<td>').text(extension.count + (extension.count > 1 ? " files" : " file")).appendTo(tr);
$('<button>')
.text("Select")
.addClass("button")
.appendTo($('<td>').appendTo(tr))
.click(function() {
for (var i = 0; i < files.length; i++) {
var file = files[i];
if (!file.selected) {
if (file.fileName.endsWith(extension.extension)) {
file.selected = true;
self._fileSelectionPanelElmts.filePanel
.find("input[index='" + i + "']")
.attr("checked", "checked");
}
}
}
self._updateFileSelectionSummary();
});
$('<button>')
.text("Unselect")
.addClass("button")
.appendTo($('<td>').appendTo(tr))
.click(function() {
for (var i = 0; i < files.length; i++) {
var file = files[i];
if (file.selected) {
if (file.fileName.endsWith(extension.extension)) {
file.selected = false;
self._fileSelectionPanelElmts.filePanel
.find("input[index='" + i + "']")
.removeAttr("checked");
}
}
}
self._updateFileSelectionSummary();
});
};
for (var i = 0; i < this._extensions.length; i++) {
renderExtension(this._extensions[i]);
}
this._updateFileSelectionSummary();
this._fileSelectionPanelElmts.regexInput.unbind().keyup(function() {
var count = 0;
var elmts = self._fileSelectionPanelElmts.filePanel
.find(".default-importing-file-selection-filename")
.removeClass("highlighted");
try {
var regex = new RegExp(this.value);
elmts.each(function() {
if (regex.test($(this).text())) {
$(this).addClass("highlighted");
count++;
}
});
} catch (e) {
// Ignore
}
self._fileSelectionPanelElmts.regexSummary.text(count + (count == 1 ? " match" : " matches"));
});
this._fileSelectionPanelElmts.selectRegexButton.unbind().click(function() {
self._fileSelectionPanelElmts.filePanel
.find(".default-importing-file-selection-filename")
.removeClass("highlighted");
try {
var regex = new RegExp(self._fileSelectionPanelElmts.regexInput[0].value);
for (var i = 0; i < files.length; i++) {
var file = files[i];
if (!file.selected) {
if (regex.test(file.fileName)) {
file.selected = true;
self._fileSelectionPanelElmts.filePanel
.find("input[index='" + i + "']")
.attr("checked", "checked");
}
}
}
self._updateFileSelectionSummary();
} catch (e) {
// Ignore
}
});
this._fileSelectionPanelElmts.unselectRegexButton.unbind().click(function() {
self._fileSelectionPanelElmts.filePanel
.find(".default-importing-file-selection-filename")
.removeClass("highlighted");
try {
var regex = new RegExp(self._fileSelectionPanelElmts.regexInput[0].value);
for (var i = 0; i < files.length; i++) {
var file = files[i];
if (file.selected) {
if (regex.test(file.fileName)) {
file.selected = false;
self._fileSelectionPanelElmts.filePanel
.find("input[index='" + i + "']")
.removeAttr("checked");
}
}
}
self._updateFileSelectionSummary();
} catch (e) {
// Ignore
}
});
};
Refine.DefaultImportingController.prototype._updateFileSelectionSummary = function() {
var fileSelection = [];
var files = this._job.config.retrievalRecord.files;
for (var i = 0; i < files.length; i++) {
if (files[i].selected) {
fileSelection.push(i);
}
}
this._job.config.fileSelection = fileSelection;
this._fileSelectionPanelElmts.summary.text(fileSelection.length + " of " + files.length + " files selected");
};
Refine.DefaultImportingController.prototype._commitFileSelection = function() {
if (this._job.config.fileSelection.length == 0) {
alert("Please select at least one file.");
return;
}
var self = this;
var dismissBusy = DialogSystem.showBusy("Inspecting selected files ...");
$.post(
"/command/core/importing-controller?" + $.param({
"controller": "core/default-importing-controller",
"jobID": this._jobID,
"subCommand": "update-file-selection"
}),
{
"fileSelection" : JSON.stringify(this._job.config.fileSelection)
},
function(data) {
if (!(data)) {
self._showImportJobError("Unknown error");
window.clearInterval(timerID);
return;
} else if (data.code == "error" || !("job" in data)) {
self._showImportJobError(data.message || "Unknown error");
window.clearInterval(timerID);
return;
}
dismissBusy();
// Different files might be selected. We start over again.
delete this._parserOptions;
self._job = data.job;
self._showParsingPanel(true);
},
"json"
);
};

View File

@ -0,0 +1,21 @@
<div bind="wizardHeader" class="default-importing-wizard-header"><div class="grid-layout layout-tightest layout-full"><table><tr>
<td width="1%"><button bind="startOverButton" class="button">&laquo; Start Over</button></td>
<td width="1%"><button bind="previousButton" class="button">&laquo; Re-select Files</button></td>
<td width="98%">Configure Parsing Options</td>
<td style="text-align: right;">Project&nbsp;name</td>
<td width="1%"><input class="inline" type="text" size="30" bind="projectNameInput" /></td>
<td width="1%"><button bind="nextButton" class="button button-primary">Create Project &raquo;</button></td>
</tr></table></div></div>
<div bind="dataPanel" class="default-importing-parsing-data-panel"></div>
<div bind="progressPanel" class="default-importing-progress-data-panel">
<img src="images/large-spinner.gif" /> Updating preview ...
</div>
<div bind="controlPanel" class="default-importing-parsing-control-panel"><table><tr>
<td class="default-importing-parsing-control-panel-formats">
<div class="default-importing-parsing-control-panel-formats-message">Parse data as</div>
<div bind="formatsContainer"></div>
</td>
<td bind="optionsContainer" class="default-importing-parsing-control-panel-options-panel"></td>
</tr></table></div>

View File

@ -0,0 +1,186 @@
/*
Copyright 2011, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
Refine.DefaultImportingController.prototype._showParsingPanel = function(hasFileSelection) {
var self = this;
if (!(this._format)) {
this._format = this._job.config.rankedFormats[0];
}
if (!(this._parserOptions)) {
this._parserOptions = {};
}
this._prepareParsingPanel();
this._parsingPanelElmts.nextButton.click(function() {
self._createProject();
});
if (hasFileSelection) {
this._parsingPanelElmts.previousButton.click(function() {
self._createProjectUI.showCustomPanel(self._fileSelectionPanel);
});
} else {
this._parsingPanelElmts.previousButton.hide();
}
if (!(this._projectName) && this._job.config.fileSelection.length > 0) {
var index = this._job.config.fileSelection[0];
var record = this._job.config.retrievalRecord.files[index];
this._projectName = $.trim(record.fileName.replace(/\W/g, ' ').replace(/\s+/g, ' '));
}
if (this._projectName) {
this._parsingPanelElmts.projectNameInput[0].value = this._projectName;
}
this._createProjectUI.showCustomPanel(this._parsingPanel);
};
Refine.DefaultImportingController.prototype._disposeFileSelectionPanel = function() {
this._disposeParserUI();
if (this._parsingPanelResizer) {
$(window).unbind("resize", this._parsingPanelResizer);
}
this._parsingPanel.unbind().empty();
delete this._parsingPanelElmts;
};
Refine.DefaultImportingController.prototype._prepareParsingPanel = function() {
var self = this;
this._parsingPanel.unbind().empty().html(
DOM.loadHTML("core", "scripts/index/default-importing-controller/parsing-panel.html"));
this._parsingPanelElmts = DOM.bind(this._parsingPanel);
this._parsingPanelElmts.startOverButton.click(function() {
self._startOver();
});
this._parsingPanelElmts.progressPanel.hide();
this._parsingPanelResizer = function() {console.log("here");
var elmts = self._parsingPanelElmts;
var width = self._parsingPanel.width();
var height = self._parsingPanel.height();
var headerHeight = elmts.wizardHeader.outerHeight(true);
var controlPanelHeight = 300;
elmts.dataPanel
.css("left", "0px")
.css("top", headerHeight + "px")
.css("width", (width - DOM.getHPaddings(elmts.dataPanel)) + "px")
.css("height", (height - headerHeight - controlPanelHeight - DOM.getVPaddings(elmts.dataPanel)) + "px");
elmts.progressPanel
.css("left", "0px")
.css("top", headerHeight + "px")
.css("width", (width - DOM.getHPaddings(elmts.progressPanel)) + "px")
.css("height", (height - headerHeight - controlPanelHeight - DOM.getVPaddings(elmts.progressPanel)) + "px");
elmts.controlPanel
.css("left", "0px")
.css("top", (height - controlPanelHeight) + "px")
.css("width", (width - DOM.getHPaddings(elmts.controlPanel)) + "px")
.css("height", (controlPanelHeight - DOM.getVPaddings(elmts.controlPanel)) + "px");
};
$(window).resize(this._parsingPanelResizer);
this._parsingPanelResizer();
var formats = this._job.config.rankedFormats;
var createFormatTab = function(format) {
var tab = $('<div>')
.text(Refine.importingConfig.formats[format].label)
.attr("format", format)
.addClass("default-importing-parsing-control-panel-format")
.appendTo(self._parsingPanelElmts.formatsContainer)
.click(function() {
self._selectFormat(format);
});
if (format == self._format) {
tab.addClass("selected");
}
};
for (var i = 0; i < formats.length; i++) {
createFormatTab(formats[i]);
}
this._selectFormat(this._format);
};
Refine.DefaultImportingController.prototype._disposeParserUI = function() {
if (this._formatParserUI) {
this._formatParserUI.dispose();
delete this._formatParserUI;
}
if (this._parsingPanelElmts) {
this._parsingPanelElmts.optionsContainer.unbind().empty();
this._parsingPanelElmts.progressPanel.unbind();
this._parsingPanelElmts.dataPanel.unbind().empty();
}
};
Refine.DefaultImportingController.prototype._selectFormat = function(newFormat) {
if (newFormat == this._format && (this._formatParserUI)) {
// The new format is the same as the existing one.
return;
}
var uiClassName = Refine.importingConfig.formats[newFormat].uiClass;
var uiClass = Refine.DefaultImportingController.parserUIs[uiClassName];
if (uiClass) {
var self = this;
this._ensureFormatParserUIHasInitializationData(newFormat, function() {
self._disposeParserUI();
self._parsingPanelElmts.formatsContainer
.find(".default-importing-parsing-control-panel-format")
.removeClass("selected")
.each(function() {
if (this.getAttribute("format") == newFormat) {
$(this).addClass("selected");
}
});
self._format = newFormat;
self._formatParserUI = new uiClass(
self,
self._jobID,
self._job,
self._format,
self._parserOptions[newFormat],
self._parsingPanelElmts.dataPanel,
self._parsingPanelElmts.progressPanel,
self._parsingPanelElmts.optionsContainer
);
});
}
};

View File

@ -0,0 +1,13 @@
<div id="default-importing-progress-panel">
<div class="grid-layout layout-normal layout-full"><table>
<tr><td colspan="3" id="default-importing-progress-message"></td></tr>
<tr><td colspan="3">
<div id="default-importing-progress-bar-frame"><div id="default-importing-progress-bar-body"></div></div>
</td></tr>
<tr><td colspan="3">
<button class="button" id="default-importing-progress-cancel-button">Cancel</button>
<span id="default-importing-progress-timing"></span>
</td></tr>
</table></div>
<iframe id="default-importing-iframe" name="default-importing-iframe"></iframe>
</div>

View File

@ -0,0 +1,6 @@
<form bind="form"><div class="grid-layout layout-normal"><table>
<tr><td>Paste data from clipboard here:</td></tr>
<tr><td><textarea bind="textInput" name="clipboard" id="default-importing-clipboard-textarea"></textarea>
</td></tr>
<tr><td><button bind="nextButton" class="button button-primary" type="button">Next &raquo;</button></td></tr>
</table></div></form>

View File

@ -0,0 +1,5 @@
<form bind="form"><div class="grid-layout layout-normal"><table>
<tr><td>Locate one or more files on your computer to upload:</td></tr>
<tr><td><input type="file" multiple bind="fileInput" name="upload" /></td></tr>
<tr><td><button bind="nextButton" class="button button-primary" type="button">Next &raquo;</button></td></tr>
</table></div></form>

View File

@ -0,0 +1,8 @@
<form bind="form"><div class="grid-layout layout-normal"><table>
<tr><td colspan="2">Enter one or more web addresses (URLs) pointing to data to download:</td></tr>
<tr bind="urlRow"><td colspan="2"><input bind="urlInput" name="download" class="default-importing-web-url" /></td></tr>
<tr bind="buttons">
<td width="1"><button bind="addButton" class="button" type="button">Add Another URL</button></td>
<td><button bind="nextButton" class="button button-primary" type="button">Next &raquo;</button></td>
</tr>
</table></div></form>

View File

@ -0,0 +1,118 @@
/*
Copyright 2011, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
function ThisComputerImportingSourceUI(controller) {
this._controller = controller;
}
Refine.DefaultImportingController.sources.push({
"label": "This Computer",
"id": "upload",
"uiClass": ThisComputerImportingSourceUI
});
ThisComputerImportingSourceUI.prototype.attachUI = function(bodyDiv) {
var self = this;
bodyDiv.html(DOM.loadHTML("core", "scripts/index/default-importing-sources/import-from-computer-form.html"));
this._elmts = DOM.bind(bodyDiv);
this._elmts.nextButton.click(function(evt) {
if (self._elmts.fileInput[0].files.length === 0) {
window.alert("You must specify a data file to import.");
} else {
self._controller.startImportJob(self._elmts.form, "Uploading data ...");
}
});
};
ThisComputerImportingSourceUI.prototype.focus = function() {
};
function UrlImportingSourceUI(controller) {
this._controller = controller;
}
Refine.DefaultImportingController.sources.push({
"label": "Web Addresses (URLs)",
"id": "download",
"uiClass": UrlImportingSourceUI
});
UrlImportingSourceUI.prototype.attachUI = function(bodyDiv) {
var self = this;
bodyDiv.html(DOM.loadHTML("core", "scripts/index/default-importing-sources/import-from-web-form.html"));
this._elmts = DOM.bind(bodyDiv);
this._elmts.nextButton.click(function(evt) {
if ($.trim(self._elmts.urlInput[0].value.length) === 0) {
window.alert("You must specify a web address (URL) to import.");
} else {
self._controller.startImportJob(self._elmts.form, "Downloading data ...");
}
});
this._elmts.addButton.click(function(evt) {
self._elmts.buttons.before(self._elmts.urlRow.clone());
});
};
UrlImportingSourceUI.prototype.focus = function() {
this._elmts.urlInput.focus();
};
function ClipboardImportingSourceUI(controller) {
this._controller = controller;
}
Refine.DefaultImportingController.sources.push({
"label": "Clipboard",
"id": "clipboard",
"uiClass": ClipboardImportingSourceUI
});
ClipboardImportingSourceUI.prototype.attachUI = function(bodyDiv) {
var self = this;
bodyDiv.html(DOM.loadHTML("core", "scripts/index/default-importing-sources/import-from-clipboard-form.html"));
this._elmts = DOM.bind(bodyDiv);
this._elmts.nextButton.click(function(evt) {
if ($.trim(self._elmts.textInput[0].value).length === 0) {
window.alert("You must paste some data to import.");
} else {
self._controller.startImportJob(self._elmts.form, "Uploading pasted data ...");
}
});
};
ClipboardImportingSourceUI.prototype.focus = function() {
this._elmts.textInput.focus();
};

View File

@ -1,7 +0,0 @@
<form bind="form"><div class="grid-layout layout-normal"><table>
<tr><td>File to import:</td></tr>
<tr><td><input type="file" bind="fileInput" name="project-file" />
<input type="hidden" name="project-name" bind="nameInput" />
</td></tr>
<tr><td><button bind="nextButton" class="button button-primary" type="button">Next &raquo;</button></td></tr>
</table></div></form>

View File

@ -0,0 +1,17 @@
<form id="project-upload-form" method="post" enctype="multipart/form-data" action="/command/core/import-project" accept-charset="UTF-8">
<div class="grid-layout layout-normal"><table>
<tr>
<td colspan="2">Locate an existing Google Refine project file (.tar or .tar.gz):</td>
<tr>
<td>Project file:</th>
<td><input type="file" id="project-tar-file-input" name="project-file" /></td>
</tr>
<tr>
<td>Re-name project (optional):</th>
<td><input type="text" size="25" id="project-name-input" name="project-name" /></td></tr>
<tr>
<td></td>
<td><input type="submit" value="Import Project" id="import-project-button" class="button button-primary" /></td>
</tr>
</table></div>
</form>

View File

@ -0,0 +1,45 @@
/*
Copyright 2011, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
Refine.ImportProjectUI = function(elmt) {
elmt.html(DOM.loadHTML("core", "scripts/index/import-project-ui.html"));
this._elmt = elmt;
this._elmts = DOM.bind(elmt);
};
Refine.actionAreas.push({
id: "import-project",
label: "Import Project",
uiClass: Refine.ImportProjectUI
});

View File

@ -0,0 +1,4 @@
<div class="relative-frame">
<div bind="projectsContainer" id="projects-container"></div>
<div bind="workspaceControls" id="projects-workspace-controls"><a id="projects-workspace-open" href="javascript:{}" class="secondary">Browse workspace directory</a></div>
</div>

View File

@ -0,0 +1,274 @@
/*
Copyright 2011, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
Refine.OpenProjectUI = function(elmt) {
var self = this;
elmt.html(DOM.loadHTML("core", "scripts/index/open-project-ui.html"));
this._elmt = elmt;
this._elmts = DOM.bind(elmt);
var resize = function() {
var height = elmt.height();
var width = elmt.width();
var controlsHeight = self._elmts.workspaceControls.outerHeight();
self._elmts.projectsContainer
.css("height", (height - controlsHeight - DOM.getVPaddings(self._elmts.projectsContainer)) + "px");
self._elmts.workspaceControls
.css("bottom", "0px")
.css("width", (width - DOM.getHPaddings(self._elmts.workspaceControls)) + "px")
};
$(window).resize(resize);
window.setTimeout(resize, 100);
$("#project-file-input").change(function() {
if ($("#project-name-input")[0].value.length == 0) {
var fileName = this.files[0].fileName;
if (fileName) {
$("#project-name-input")[0].value = fileName.replace(/\.\w+/, "").replace(/[_-]/g, " ");
}
$("#project-name-input").focus().select();
}
}).keypress(function(evt) {
if (evt.keyCode == 13) {
return self._onClickUploadFileButton(evt);
}
});
$("#upload-file-button").click(function(evt) {
return self._onClickUploadFileButton(evt)
});
$('#projects-workspace-open').click(function() {
$.ajax({
type: "POST",
url: "/command/core/open-workspace-dir",
dataType: "json",
success: function (data) {
if (data.code != "ok" && "message" in data) {
alert(data.message);
}
}
});
});
this._fetchProjects();
};
Refine.OpenProjectUI.prototype._fetchProjects = function() {
var self = this;
$.getJSON(
"/command/core/get-all-project-metadata",
null,
function(data) {
self._renderProjects(data);
},
"json"
);
};
Refine.OpenProjectUI.prototype._renderProjects = function(data) {
var self = this;
var projects = [];
for (var n in data.projects) {
if (data.projects.hasOwnProperty(n)) {
var project = data.projects[n];
project.id = n;
project.date = Date.parseExact(project.modified, "yyyy-MM-ddTHH:mm:ssZ");
projects.push(project);
}
}
projects.sort(function(a, b) { return b.date.getTime() - a.date.getTime(); });
var container = $("#projects-container").empty();
if (!projects.length) {
$("#no-project-message").clone().show().appendTo(container);
} else {
Refine.selectActionArea('open-project');
var table = $(
'<table class="list-table"><tr>' +
'<th>Name</th>' +
'<th></th>' +
'<th></th>' +
'<th align="right">Last&nbsp;modified</th>' +
'</tr></table>'
).appendTo(container)[0];
var formatDate = function(d) {
var d = new Date(d);
var last_year = Date.today().add({ years: -1 });
var last_month = Date.today().add({ months: -1 });
var last_week = Date.today().add({ days: -7 });
var today = Date.today();
var tomorrow = Date.today().add({ days: 1 });
if (d.between(today, tomorrow)) {
return "today " + d.toString("h:mm tt");
} else if (d.between(last_week, today)) {
var diff = Math.floor(today.getDayOfYear() - d.getDayOfYear());
return (diff <= 1) ? ("yesterday " + d.toString("h:mm tt")) : (diff + " days ago");
} else if (d.between(last_month, today)) {
var diff = Math.floor((today.getDayOfYear() - d.getDayOfYear()) / 7);
return (diff == 1) ? "a week ago" : diff.toFixed(0) + " weeks ago" ;
} else if (d.between(last_year, today)) {
var diff = Math.floor(today.getMonth() - d.getMonth());
return (diff == 1) ? "a month ago" : diff + " months ago";
} else {
var diff = Math.floor(today.getYear() - d.getYear());
return (diff == 1) ? "a year ago" : diff + " years ago";
}
};
var renderProject = function(project) {
var tr = table.insertRow(table.rows.length);
tr.className = "project";
var nameLink = $('<a></a>')
.addClass("list-table-itemname")
.text(project.name)
.attr("href", "/project?project=" + project.id)
.appendTo(tr.insertCell(tr.cells.length));
var renameLink = $('<a></a>')
.text("rename")
.addClass("secondary")
.attr("href", "javascript:{}")
.css("visibility", "hidden")
.click(function() {
var name = window.prompt("New project name:", project.name);
if (name == null) {
return;
}
name = $.trim(name);
if (project.name == name || name.length == 0) {
return;
}
$.ajax({
type: "POST",
url: "/command/core/rename-project",
data: { "project" : project.id, "name" : name },
dataType: "json",
success: function (data) {
if (data && typeof data.code != 'undefined' && data.code == "ok") {
nameLink.text(name);
} else {
alert("Failed to rename project: " + data.message)
}
}
});
}).appendTo(tr.insertCell(tr.cells.length));
var deleteLink = $('<a></a>')
.addClass("delete-project")
.attr("title","Delete this project")
.attr("href","")
.css("visibility", "hidden")
.html("<img src='/images/close.png' />")
.click(function() {
if (window.confirm("Are you sure you want to delete project \"" + project.name + "\"?")) {
$.ajax({
type: "POST",
url: "/command/core/delete-project",
data: { "project" : project.id },
dataType: "json",
success: function (data) {
if (data && typeof data.code != 'undefined' && data.code == "ok") {
self._fetchProjects();
}
}
});
}
return false;
}).appendTo(tr.insertCell(tr.cells.length));
$('<div></div>')
.html(formatDate(project.date))
.addClass("last-modified")
.attr("title", project.date.toString())
.appendTo(tr.insertCell(tr.cells.length));
$(tr).mouseenter(function() {
renameLink.css("visibility", "visible");
deleteLink.css("visibility", "visible");
}).mouseleave(function() {
renameLink.css("visibility", "hidden");
deleteLink.css("visibility", "hidden");
});
};
for (var i = 0; i < projects.length; i++) {
renderProject(projects[i]);
}
}
};
Refine.OpenProjectUI.prototype._onClickUploadFileButton = function(evt) {
var projectName = $("#project-name-input")[0].value;
var dataURL = $.trim($("#project-url-input")[0].value);
if (! $.trim(projectName).length) {
window.alert("You must specify a project name.");
} else if ($("#project-file-input")[0].files.length === 0 && ! dataURL.length) {
window.alert("You must specify a data file to upload or a URL to retrieve.");
} else {
$("#file-upload-form").attr("action",
"/command/core/create-project-from-upload?" + [
"url=" + escape(dataURL),
"split-into-columns=" + $("#split-into-columns-input")[0].checked,
"separator=" + $("#separator-input")[0].value,
"ignore=" + $("#ignore-input")[0].value,
"header-lines=" + $("#header-lines-input")[0].value,
"skip=" + $("#skip-input")[0].value,
"limit=" + $("#limit-input")[0].value,
"guess-value-type=" + $("#guess-value-type-input")[0].checked,
"ignore-quotes=" + $("#ignore-quotes-input")[0].checked
].join("&"));
return true;
}
evt.preventDefault();
return false;
};
Refine.actionAreas.push({
id: "open-project",
label: "Open Project",
uiClass: Refine.OpenProjectUI
});

View File

@ -0,0 +1,36 @@
<div class="grid-layout layout-loose layout-full"><table>
<tr>
<td colspan="2"></td>
<td><div class="grid-layout layout-tighter layout-full"><table>
<tr>
<td style="text-align: right;">&nbsp;</td>
<td width="1%"><button class="button" bind="previewButton">Update&nbsp;Preview</button></td>
</tr>
</table></div></td>
</tr>
<tr>
<td><div class="grid-layout layout-tightest"><table bind="sheetRecordContainer">
<tr><td colspan="3">Worksheets to Import</td></tr>
</table></div></td>
<td><div class="grid-layout layout-tightest"><table>
<tr><td width="1%"><input type="checkbox" bind="ignoreCheckbox" /></td><td>Ignore first</td>
<td><input bind="ignoreInput" type="text" class="lightweight" size="2" value="0" /> line(s) at beginning of file</td></tr>
<tr><td width="1%"><input type="checkbox" bind="headerLinesCheckbox" /></td><td>Parse next</td>
<td><input bind="headerLinesInput" type="text" class="lightweight" size="2" value="1" /> line(s) as column headers</td></tr>
<tr><td width="1%"><input type="checkbox" bind="skipCheckbox" /></td><td>Discard initial</td>
<td><input bind="skipInput" type="text" class="lightweight" size="2" value="0" /> row(s) of data</td></tr>
<tr><td width="1%"><input type="checkbox" bind="limitCheckbox" /></td><td>Load at most</td>
<td><input bind="limitInput" type="text" class="lightweight" size="2" value="0" /> row(s) of data</td></tr>
</table></div></td>
<td><div class="grid-layout layout-tightest"><table>
<tr><td width="1%"><input type="checkbox" bind="storeBlankRowsCheckbox" /></td>
<td colspan="2">Store blank rows</td></tr>
<tr><td width="1%"><input type="checkbox" bind="storeBlankCellsAsNullsCheckbox" /></td>
<td colspan="2">Store blank cells as nulls</td></tr>
<tr><td width="1%"><input type="checkbox" bind="includeFileSourcesCheckbox" /></td>
<td>Store file source<br/>(file names, URLs)<br/>in each row</td></tr>
</table></div></td>
</tr>
</table></div>

View File

@ -0,0 +1,198 @@
/*
Copyright 2011, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
Refine.ExcelParserUI = function(controller, jobID, job, format, config,
dataContainerElmt, progressContainerElmt, optionContainerElmt) {
this._controller = controller;
this._jobID = jobID;
this._job = job;
this._format = format;
this._config = config;
this._dataContainer = dataContainerElmt;
this._progressContainer = progressContainerElmt;
this._optionContainer = optionContainerElmt;
this._timerID = null;
this._initialize();
this._updatePreview();
};
Refine.DefaultImportingController.parserUIs["ExcelParserUI"] = Refine.ExcelParserUI;
Refine.ExcelParserUI.prototype.dispose = function() {
if (this._timerID != null) {
window.clearTimeout(this._timerID);
this._timerID = null;
}
};
Refine.ExcelParserUI.prototype.confirmReadyToCreateProject = function() {
return true; // always ready
};
Refine.ExcelParserUI.prototype.getOptions = function() {
var options = {
xmlBased: this._config.xmlBased,
sheets: []
};
var parseIntDefault = function(s, def) {
try {
var n = parseInt(s);
if (!isNaN(n)) {
return n;
}
} catch (e) {
// Ignore
}
return def;
};
this._optionContainerElmts.sheetRecordContainer.find('input').each(function() {
if (this.checked) {
options.sheets.push(parseInt(this.getAttribute('index')));
}
});
if (this._optionContainerElmts.ignoreCheckbox[0].checked) {
options.ignoreLines = parseIntDefault(this._optionContainerElmts.ignoreInput[0].value, -1);
} else {
options.ignoreLines = -1;
}
if (this._optionContainerElmts.headerLinesCheckbox[0].checked) {
options.headerLines = parseIntDefault(this._optionContainerElmts.headerLinesInput[0].value, 0);
} else {
options.headerLines = 0;
}
if (this._optionContainerElmts.skipCheckbox[0].checked) {
options.skipDataLines = parseIntDefault(this._optionContainerElmts.skipInput[0].value, 0);
} else {
options.skipDataLines = 0;
}
if (this._optionContainerElmts.limitCheckbox[0].checked) {
options.limit = parseIntDefault(this._optionContainerElmts.limitInput[0].value, -1);
} else {
options.limit = -1;
}
options.storeBlankRows = this._optionContainerElmts.storeBlankRowsCheckbox[0].checked;
options.storeBlankCellsAsNulls = this._optionContainerElmts.storeBlankCellsAsNullsCheckbox[0].checked;
options.includeFileSources = this._optionContainerElmts.includeFileSourcesCheckbox[0].checked;
return options;
};
Refine.ExcelParserUI.prototype._initialize = function() {
var self = this;
this._optionContainer.unbind().empty().html(
DOM.loadHTML("core", "scripts/index/parser-interfaces/excel-parser-ui.html"));
this._optionContainerElmts = DOM.bind(this._optionContainer);
this._optionContainerElmts.previewButton.click(function() { self._updatePreview(); });
var sheetTable = this._optionContainerElmts.sheetRecordContainer[0];
$.each(this._config.sheetRecords, function(i, v) {
var tr = sheetTable.insertRow(sheetTable.rows.length);
var td0 = $(tr.insertCell(0)).attr('width', '1%');
var checkbox = $('<input>')
.attr('type', 'checkbox')
.attr('index', i)
.appendTo(td0);
if (this.selected) {
checkbox.attr('checked', 'true');
}
$(tr.insertCell(1)).text(this.name);
$(tr.insertCell(2)).text(this.rows + ' rows');
});
if (this._config.ignoreLines > 0) {
this._optionContainerElmts.ignoreCheckbox.attr("checked", "checked");
this._optionContainerElmts.ignoreInput[0].value = this._config.ignoreLines.toString();
}
if (this._config.headerLines > 0) {
this._optionContainerElmts.headerLinesCheckbox.attr("checked", "checked");
this._optionContainerElmts.headerLinesInput[0].value = this._config.headerLines.toString();
}
if (this._config.limit > 0) {
this._optionContainerElmts.limitCheckbox.attr("checked", "checked");
this._optionContainerElmts.limitInput[0].value = this._config.limit.toString();
}
if (this._config.skipDataLines > 0) {
this._optionContainerElmts.skipCheckbox.attr("checked", "checked");
this._optionContainerElmts.skipInput.value[0].value = this._config.skipDataLines.toString();
}
if (this._config.storeBlankRows) {
this._optionContainerElmts.storeBlankRowsCheckbox.attr("checked", "checked");
}
if (this._config.storeBlankCellsAsNulls) {
this._optionContainerElmts.storeBlankCellsAsNullsCheckbox.attr("checked", "checked");
}
if (this._config.includeFileSources) {
this._optionContainerElmts.includeFileSourcesCheckbox.attr("checked", "checked");
}
var onChange = function() {
self._scheduleUpdatePreview();
};
this._optionContainer.find("input").bind("change", onChange);
this._optionContainer.find("select").bind("change", onChange);
};
Refine.ExcelParserUI.prototype._scheduleUpdatePreview = function() {
if (this._timerID != null) {
window.clearTimeout(this._timerID);
this._timerID = null;
}
var self = this;
this._timerID = window.setTimeout(function() {
self._timerID = null;
self._updatePreview();
}, 500); // 0.5 second
};
Refine.ExcelParserUI.prototype._updatePreview = function() {
var self = this;
this._progressContainer.show();
this._controller.updateFormatAndOptions(this.getOptions(), function(result) {
if (result.status == "ok") {
self._controller.getPreviewData(function(projectData) {
self._progressContainer.hide();
new Refine.PreviewTable(projectData, self._dataContainer.unbind().empty());
});
}
});
};

View File

@ -0,0 +1,51 @@
<div class="grid-layout layout-loose layout-full"><table>
<tr>
<td><div class="grid-layout layout-tighter"><table>
<tr>
<td width="1%">Character&nbsp;encoding</td>
<td><select bind="encodingSelect"></select></td>
</tr>
</table></div></td>
<td colspan="2"><div class="grid-layout layout-tighter layout-full"><table>
<tr>
<td style="text-align: right;">&nbsp;</td>
<td width="1%"><button class="button" bind="previewButton">Update&nbsp;Preview</button></td>
</tr>
</table></div></td>
</tr>
<tr>
<td colspan="3"><div class="grid-layout layout-tightest"><table>
<tr><td>Column widths:</td><td><input style="width: 40em;" bind="columnWidthsInput" /></td><td>comma separated numbers</td></tr>
<tr><td>Column names:</td><td><input style="width: 40em;" bind="columnNamesInput" /></td><td>optional, comma separated</td></tr>
</table></div></td>
</tr>
<tr>
<td><div class="grid-layout layout-tightest"><table>
<tr><td colspan="2">Rows are separated by</td></tr>
<tr><td width="1%"><input type="radio" name="row-separator" value="new-line" /></td><td>new line characters \n</td></tr>
<tr><td width="1%"><input type="radio" name="row-separator" value="custom" /></td><td>custom
<input bind="rowSeparatorInput" type="text" class="lightweight" size="5" /></td></tr>
<tr><td colspan="2">Escape special characters with \</td></tr>
</table></div></td>
<td><div class="grid-layout layout-tightest"><table>
<tr><td width="1%"><input type="checkbox" bind="ignoreCheckbox" /></td><td>Ignore first</td>
<td><input bind="ignoreInput" type="text" class="lightweight" size="2" value="0" /> line(s) at beginning of file</td></tr>
<tr><td width="1%"><input type="checkbox" bind="headerLinesCheckbox" /></td><td>Parse next</td>
<td><input bind="headerLinesInput" type="text" class="lightweight" size="2" value="1" /> line(s) as column headers</td></tr>
<tr><td width="1%"><input type="checkbox" bind="skipCheckbox" /></td><td>Discard initial</td>
<td><input bind="skipInput" type="text" class="lightweight" size="2" value="0" /> row(s) of data</td></tr>
<tr><td width="1%"><input type="checkbox" bind="limitCheckbox" /></td><td>Load at most</td>
<td><input bind="limitInput" type="text" class="lightweight" size="2" value="0" /> row(s) of data</td></tr>
</table></div></td>
<td><div class="grid-layout layout-tightest"><table>
<tr><td width="1%"><input type="checkbox" bind="guessCellValueTypesCheckbox" /></td>
<td>Parse cell text into<br/>numbers, dates, ...</td></tr>
<tr><td width="1%"><input type="checkbox" bind="storeBlankRowsCheckbox" /></td>
<td colspan="2">Store blank rows</td></tr>
<tr><td width="1%"><input type="checkbox" bind="storeBlankCellsAsNullsCheckbox" /></td>
<td colspan="2">Store blank cells as nulls</td></tr>
<tr><td width="1%"><input type="checkbox" bind="includeFileSourcesCheckbox" /></td>
<td>Store file source<br/>(file names, URLs)<br/>in each row</td></tr>
</table></div></td>
</tr>
</table></div>

View File

@ -0,0 +1,471 @@
/*
Copyright 2011, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
Refine.FixedWidthParserUI = function(controller, jobID, job, format, config,
dataContainerElmt, progressContainerElmt, optionContainerElmt) {
this._controller = controller;
this._jobID = jobID;
this._job = job;
this._format = format;
this._config = config;
this._dataContainer = dataContainerElmt;
this._progressContainer = progressContainerElmt;
this._optionContainer = optionContainerElmt;
this._timerID = null;
this._initialize();
this.updatePreview();
};
Refine.DefaultImportingController.parserUIs["FixedWidthParserUI"] = Refine.FixedWidthParserUI;
Refine.FixedWidthParserUI.encodeSeparator = function(s) {
return s.replace("\\", "\\\\")
.replace("\n", "\\n")
.replace("\t", "\\t");
};
Refine.FixedWidthParserUI.decodeSeparator = function(s) {
return s.replace("\\n", "\n")
.replace("\\t", "\t")
.replace("\\\\", "\\");
};
Refine.FixedWidthParserUI.prototype.dispose = function() {
if (this._timerID != null) {
window.clearTimeout(this._timerID);
this._timerID = null;
}
};
Refine.FixedWidthParserUI.prototype.confirmReadyToCreateProject = function() {
return true; // always ready
};
Refine.FixedWidthParserUI.prototype.getOptions = function() {
var options = {
columnWidths: [].concat(this._config.columnWidths)
};
var columnNames = $.trim(this._optionContainerElmts.columnNamesInput[0].value).replace(/,\s+/g, ',').split(',');
if (columnNames.length > 0 && columnNames[0].length > 0) {
options.columnNames = columnNames;
}
switch (this._optionContainer.find("input[name='row-separator']:checked")[0].value) {
case 'new-line':
options.lineSeparator = "\n";
break;
default:
options.lineSeparator = Refine.FixedWidthParserUI.decodeSeparator(
this._optionContainerElmts.rowSeparatorInput[0].value);
}
var parseIntDefault = function(s, def) {
try {
var n = parseInt(s);
if (!isNaN(n)) {
return n;
}
} catch (e) {
// Ignore
}
return def;
};
if (this._optionContainerElmts.ignoreCheckbox[0].checked) {
options.ignoreLines = parseIntDefault(this._optionContainerElmts.ignoreInput[0].value, -1);
} else {
options.ignoreLines = -1;
}
if (this._optionContainerElmts.headerLinesCheckbox[0].checked) {
options.headerLines = parseIntDefault(this._optionContainerElmts.headerLinesInput[0].value, 0);
} else {
options.headerLines = 0;
}
if (this._optionContainerElmts.skipCheckbox[0].checked) {
options.skipDataLines = parseIntDefault(this._optionContainerElmts.skipInput[0].value, 0);
} else {
options.skipDataLines = 0;
}
if (this._optionContainerElmts.limitCheckbox[0].checked) {
options.limit = parseIntDefault(this._optionContainerElmts.limitInput[0].value, -1);
} else {
options.limit = -1;
}
options.guessCellValueTypes = this._optionContainerElmts.guessCellValueTypesCheckbox[0].checked;
options.storeBlankRows = this._optionContainerElmts.storeBlankRowsCheckbox[0].checked;
options.storeBlankCellsAsNulls = this._optionContainerElmts.storeBlankCellsAsNullsCheckbox[0].checked;
options.includeFileSources = this._optionContainerElmts.includeFileSourcesCheckbox[0].checked;
return options;
};
Refine.FixedWidthParserUI.prototype._initialize = function() {
var self = this;
this._optionContainer.unbind().empty().html(
DOM.loadHTML("core", "scripts/index/parser-interfaces/fixed-width-parser-ui.html"));
this._optionContainerElmts = DOM.bind(this._optionContainer);
this._optionContainerElmts.previewButton.click(function() { self._updatePreview(); });
this._optionContainerElmts.columnWidthsInput[0].value = this._config.columnWidths.join(',');
if ('columnNames' in this._config) {
this._optionContainerElmts.columnNamesInput[0].value = this._config.columnNames.join(',');
}
var rowSeparatorValue = (this._config.lineSeparator == "\n") ? 'new-line' : 'custom';
this._optionContainer.find(
"input[name='row-separator'][value='" + rowSeparatorValue + "']").attr("checked", "checked");
this._optionContainerElmts.rowSeparatorInput[0].value =
Refine.FixedWidthParserUI.encodeSeparator(this._config.lineSeparator);
if (this._config.ignoreLines > 0) {
this._optionContainerElmts.ignoreCheckbox.attr("checked", "checked");
this._optionContainerElmts.ignoreInput[0].value = this._config.ignoreLines.toString();
}
if (this._config.headerLines > 0) {
this._optionContainerElmts.headerLinesCheckbox.attr("checked", "checked");
this._optionContainerElmts.headerLinesInput[0].value = this._config.headerLines.toString();
}
if (this._config.limit > 0) {
this._optionContainerElmts.limitCheckbox.attr("checked", "checked");
this._optionContainerElmts.limitInput[0].value = this._config.limit.toString();
}
if (this._config.skipDataLines > 0) {
this._optionContainerElmts.skipCheckbox.attr("checked", "checked");
this._optionContainerElmts.skipInput.value[0].value = this._config.skipDataLines.toString();
}
if (this._config.storeBlankRows) {
this._optionContainerElmts.storeBlankRowsCheckbox.attr("checked", "checked");
}
if (this._config.guessCellValueTypes) {
this._optionContainerElmts.guessCellValueTypesCheckbox.attr("checked", "checked");
}
if (this._config.storeBlankCellsAsNulls) {
this._optionContainerElmts.storeBlankCellsAsNullsCheckbox.attr("checked", "checked");
}
if (this._config.includeFileSources) {
this._optionContainerElmts.includeFileSourcesCheckbox.attr("checked", "checked");
}
var onChange = function() {
self._scheduleUpdatePreview();
};
this._optionContainer.find("input").bind("change", onChange);
this._optionContainer.find("select").bind("change", onChange);
this._optionContainerElmts.columnWidthsInput.bind("change", function() {
var newColumnWidths = [];
var a = $.trim(this.value).replace(/,\s+/g, ',').split(',');
for (var i = 0; i < a.length; i++) {
var n = parseInt(a[i]);
if (isNaN(n)) {
return;
}
newColumnWidths.push(n);
}
self._config.columnWidths = newColumnWidths;
onChange();
});
this._optionContainerElmts.columnNamesInput.bind("change", onChange);
};
Refine.FixedWidthParserUI.prototype._scheduleUpdatePreview = function() {
if (this._timerID != null) {
window.clearTimeout(this._timerID);
this._timerID = null;
}
var self = this;
this._timerID = window.setTimeout(function() {
self._timerID = null;
self.updatePreview();
}, 500); // 0.5 second
};
Refine.FixedWidthParserUI.prototype.updatePreview = function() {
var self = this;
this._progressContainer.show();
var options = this.getOptions();
// for preview, we need exact text, so it's easier to show where the columns are split
options.guessCellValueTypes = false;
this._controller.updateFormatAndOptions(options, function(result) {
if (result.status == "ok") {
self._controller.getPreviewData(function(projectData) {
new Refine.FixedWidthPreviewTable(
self,
self._config,
projectData,
self._dataContainer
);
self._progressContainer.hide();
}, 20);
}
});
};
Refine.FixedWidthPreviewTable = function(parserUI, config, projectData, elmt) {
this._parserUI = parserUI;
this._config = config;
this._projectData = projectData;
this._elmt = elmt;
this._render();
};
Refine.FixedWidthPreviewTable.prototype._render = function() {
var scrollTop = this._elmt[0].scrollTop;
var scrollLeft = this._elmt[0].scrollLeft;
this._elmt.unbind().empty();
var self = this;
var container = $('<div>')
.addClass('fixed-width-preview-container')
.appendTo(this._elmt);
var table = $('<table>')
.addClass("data-table")
.addClass("fixed-width-preview-data-table")
.appendTo(container)[0];
var columns = this._projectData.columnModel.columns;
var columnWidths = [].concat(this._config.columnWidths);
var addCell = function(tr) {
var index = tr.cells.length;
var td = tr.insertCell(index);
td.className = (index % 2 == 0) ? 'even' : 'odd';
return td;
};
/*------------------------------------------------------------
* Column Headers
*------------------------------------------------------------
*/
var trHead = table.insertRow(table.rows.length);
$(addCell(trHead)).addClass("column-header").html('&nbsp;'); // index
var createColumnHeader = function(column, index) {
var name = column.name;
if (index < columnWidths.length) {
name = name.slice(0, columnWidths[index]);
}
$(addCell(trHead))
.addClass("column-header")
.text(name)
.attr('title', column.name);
};
for (var i = 0; i < columns.length; i++) {
createColumnHeader(columns[i], i);
}
/*------------------------------------------------------------
* Data Cells
*------------------------------------------------------------
*/
var rows = this._projectData.rowModel.rows;
var renderRow = function(tr, r, row) {
var tdIndex = addCell(tr);
$('<div></div>').html((row.i + 1) + ".").appendTo(tdIndex);
var cells = row.cells;
for (var i = 0; i < columns.length; i++) {
var column = columns[i];
var td = addCell(tr);
var divContent = $('<div/>').addClass("data-table-cell-content").appendTo(td);
var cell = (column.cellIndex < cells.length) ? cells[column.cellIndex] : null;
if (!cell || ("v" in cell && cell.v === null)) {
$('<span>').html("&nbsp;").appendTo(divContent);
} else if ("e" in cell) {
$('<span>').addClass("data-table-error").text(cell.e).appendTo(divContent);
} else if (!("r" in cell) || !cell.r) {
if (typeof cell.v !== "string") {
if (typeof cell.v == "number") {
divContent.addClass("data-table-cell-content-numeric");
}
$('<span>')
.addClass("data-table-value-nonstring")
.text(cell.v)
.appendTo(divContent);
} else if (URL.looksLikeUrl(cell.v)) {
$('<a>')
.text(cell.v)
.attr("href", cell.v)
.attr("target", "_blank")
.appendTo(divContent);
} else {
$('<span>').text(cell.v).appendTo(divContent);
}
}
}
};
for (var r = 0; r < rows.length; r++) {
var row = rows[r];
renderRow(table.insertRow(table.rows.length), r, row);
}
var pixelOffset = $(trHead.cells[1]).position().left;
var testString = '01234567890123456789012345678901234567890123456789';
var testDiv = $('<div>')
.css('position', 'absolute')
.css('top', '-100px')
.text(testString)
.appendTo(container);
var pixelsPerChar = testDiv.width() / testString.length;
testDiv.remove();
var columnSeparators = [];
var columnCharIndexes = [];
var positionColumnSeparator = function(outer, charIndex) {
outer.css('left',
Math.round(pixelOffset + charIndex * pixelsPerChar - DOM.getHPaddings(outer) / 2) + 'px');
};
var computeCharIndex = function(evt) {
var offset = evt.pageX - container.offset().left;
return Math.round((offset - pixelOffset) / pixelsPerChar);
};
var updatePreview = function() {
columnCharIndexes.sort(function(a, b) { return a - b; });
var newColumnWidths = [];
for (var i = 0; i < columnCharIndexes.length; i++) {
var charIndex = columnCharIndexes[i];
var columnWidth = (i == 0) ? charIndex : (charIndex - columnCharIndexes[i - 1]);
if (columnWidth > 0) {
newColumnWidths.push(columnWidth);
}
}
self._config.columnWidths = newColumnWidths;
self._parserUI._optionContainerElmts.columnWidthsInput[0].value = newColumnWidths.join(',');
self._parserUI.updatePreview();
};
var newSeparator = $('<div>')
.addClass('fixed-width-preview-column-separator-outer')
.append($('<div>').addClass('fixed-width-preview-column-separator-inner'))
.appendTo(container);
var createColumnSeparator = function(charIndex, index) {
columnCharIndexes[index] = charIndex;
var outer = $('<div>')
.addClass('fixed-width-preview-column-separator-outer')
.appendTo(container);
var inner = $('<div>')
.addClass('fixed-width-preview-column-separator-inner')
.appendTo(outer);
var close = $('<div>').appendTo(inner);
positionColumnSeparator(outer, charIndex);
outer.mouseover(function() {
newSeparator.hide();
})
.mouseout(function() {
newSeparator.show();
})
.mousedown(function() {
var mouseMove = function(evt) {
var newCharIndex = computeCharIndex(evt);
positionColumnSeparator(outer, newCharIndex);
evt.preventDefault();
evt.stopPropagation();
return false;
};
var mouseUp = function(evt) {
container.unbind('mousemove', mouseMove);
container.unbind('mouseup', mouseUp);
var newCharIndex = computeCharIndex(evt);
positionColumnSeparator(outer, newCharIndex);
columnCharIndexes[index] = newCharIndex;
updatePreview();
evt.preventDefault();
evt.stopPropagation();
return false;
};
container.bind('mousemove', mouseMove);
container.bind('mouseup', mouseUp);
});
close.click(function() {
columnCharIndexes[index] = index > 0 ? columnCharIndexes[index - 1] : 0;
updatePreview();
});
};
var charOffset = 0;
for (var i = 0; i < columnWidths.length; i++) {
var columnWidth = columnWidths[i];
createColumnSeparator(charOffset + columnWidth, i);
charOffset += columnWidth;
}
container
.mouseout(function(evt) {
newSeparator.hide();
})
.mousemove(function(evt) {
var offset = evt.pageX - container.offset().left;
var newCharIndex = Math.round((offset - pixelOffset) / pixelsPerChar);
positionColumnSeparator(newSeparator.show(), newCharIndex);
});
newSeparator.mousedown(function(evt) {
var newCharIndex = computeCharIndex(evt);
columnCharIndexes.push(newCharIndex);
updatePreview();
evt.preventDefault();
evt.stopPropagation();
return false;
});
this._elmt[0].scrollTop = scrollTop;
this._elmt[0].scrollLeft = scrollLeft;
};

View File

@ -0,0 +1,4 @@
<div class="json-parser-ui-select-message">
Click on the first JSON { } node corresponding to the first record to load.
</div>
<div class="json-parser-ui-select-dom" bind="domContainer"></div>

View File

@ -0,0 +1,19 @@
<div class="grid-layout layout-loose layout-full"><table>
<tr>
<td colspan="2"><div class="grid-layout layout-tighter layout-full"><table>
<tr>
<td style="text-align: right;">&nbsp;</td>
<td width="1%"><button class="button" bind="pickRecordElementsButton">Pick Record Nodes</button></td>
<td width="1%"><button class="button" bind="previewButton">Update Preview</button></td>
</tr>
</table></div></td>
</tr>
<tr>
<td><div class="grid-layout layout-tightest"><table>
<tr><td width="1%"><input type="checkbox" bind="limitCheckbox" /></td><td>Load at most</td>
<td><input bind="limitInput" type="text" class="lightweight" size="2" value="0" /> record(s) of data</td></tr>
<tr><td width="1%"><input type="checkbox" bind="includeFileSourcesCheckbox" /></td>
<td colspan="2">Store file source (file names, URLs) in each row</td></tr>
</table></div></td>
</tr>
</table></div>

View File

@ -0,0 +1,241 @@
/*
Copyright 2011, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
Refine.JsonParserUI = function(controller, jobID, job, format, config,
dataContainerElmt, progressContainerElmt, optionContainerElmt) {
this._controller = controller;
this._jobID = jobID;
this._job = job;
this._format = format;
this._config = config;
this._dataContainer = dataContainerElmt;
this._progressContainer = progressContainerElmt;
this._optionContainer = optionContainerElmt;
this._timerID = null;
this._initialize();
this._showPickRecordNodesUI();
};
Refine.DefaultImportingController.parserUIs["JsonParserUI"] = Refine.JsonParserUI;
Refine.JsonParserUI.prototype.dispose = function() {
if (this._timerID != null) {
window.clearTimeout(this._timerID);
this._timerID = null;
}
};
Refine.JsonParserUI.prototype.confirmReadyToCreateProject = function() {
if ((this._config.recordPath) && this._config.recordPath.length > 0) {
return true;
} else {
window.alert('Please specify a record path first.');
}
};
Refine.JsonParserUI.prototype.getOptions = function() {
var options = {
recordPath: this._config.recordPath
};
if (this._optionContainerElmts.limitCheckbox[0].checked) {
options.limit = parseIntDefault(this._optionContainerElmts.limitInput[0].value, -1);
} else {
options.limit = -1;
}
options.includeFileSources = this._optionContainerElmts.includeFileSourcesCheckbox[0].checked;
return options;
};
Refine.JsonParserUI.prototype._initialize = function() {
var self = this;
this._optionContainer.unbind().empty().html(
DOM.loadHTML("core", "scripts/index/parser-interfaces/json-parser-ui.html"));
this._optionContainerElmts = DOM.bind(this._optionContainer);
this._optionContainerElmts.previewButton.click(function() { self._updatePreview(); });
if (this._config.limit > 0) {
this._optionContainerElmts.limitCheckbox.attr("checked", "checked");
this._optionContainerElmts.limitInput[0].value = this._config.limit.toString();
}
if (this._config.includeFileSources) {
this._optionContainerElmts.includeFileSourcesCheckbox.attr("checked", "checked");
}
this._optionContainerElmts.pickRecordElementsButton.click(function() {
self._showPickRecordNodesUI();
});
var onChange = function() {
self._scheduleUpdatePreview();
};
this._optionContainer.find("input").bind("change", onChange);
this._optionContainer.find("select").bind("change", onChange);
};
Refine.JsonParserUI.prototype._showPickRecordNodesUI = function() {
var self = this;
this._dataContainer.unbind().empty().html(
DOM.loadHTML("core", "scripts/index/parser-interfaces/json-parser-select-ui.html"));
var elmts = DOM.bind(this._dataContainer);
var escapeElmt = $('<span>');
var escapeHtml = function(s) {
escapeElmt.empty().text(s);
return escapeElmt.html();
};
var textAsHtml = function(s) {
s = s.length <= 200 ? s : (s.substring(0, 200) + ' ...');
return '<span class="text">' + escapeHtml(s) + '</span>';
};
var hittest = function(evt, elmt) {
var a = $(evt.target).closest('.node');
return a.length > 0 && a[0] == elmt[0];
};
var registerEvents = function(elmt, path) {
elmt.bind('mouseover', function(evt) {
if (hittest(evt, elmt)) {
elmts.domContainer.find('.highlight').removeClass('highlight');
elmt.addClass('highlight');
}
})
.bind('mouseout', function(evt) {
elmt.removeClass('highlight');
})
.click(function(evt) {
if (hittest(evt, elmt)) {
self._setRecordPath(path);
}
});
};
var renderArray = function(a, container, parentPath) {
$('<span>').addClass('punctuation').text('[').appendTo(container);
var parentPath2 = [].concat(parentPath);
parentPath2.push('__anonymous__');
var elementNode = null;
for (var i = 0; i < a.length; i++) {
if (elementNode != null) {
$('<span>').addClass('punctuation').text(',').appendTo(elementNode);
}
elementNode = $('<div>').addClass('node').addClass('indented').appendTo(container);
renderNode(a[i], elementNode, parentPath2);
}
$('<span>').addClass('punctuation').text(']').appendTo(container);
};
var renderObject = function(o, container, parentPath) {
$('<span>').addClass('punctuation').text('{').appendTo(container);
var elementNode = null;
for (var key in o) {
if (o.hasOwnProperty(key)) {
if (elementNode != null) {
$('<span>').addClass('punctuation').text(',').appendTo(elementNode);
}
elementNode = $('<div>').addClass('node').addClass('indented').appendTo(container);
$('<span>').text(key).addClass('field-name').appendTo(elementNode);
$('<span>').text(': ').addClass('punctuation').appendTo(elementNode);
var parentPath2 = [].concat(parentPath);
parentPath2.push(key);
renderNode(o[key], elementNode, parentPath2);
}
}
$('<span>').addClass('punctuation').text('}').appendTo(container);
registerEvents(container, parentPath);
};
var renderNode = function(node, container, parentPath) {
if (node == null) {
$('<span>').addClass('literal').text('null').appendTo(container);
} else {
if ($.isPlainObject(node)) {
renderObject(node, container, parentPath);
} else if ($.isArray(node)) {
renderArray(node, container, parentPath);
} else {
$('<span>').addClass('literal').text(node.toString()).appendTo(container);
registerEvents(container, parentPath);
}
}
};
renderNode(this._config.dom, elmts.domContainer, [ '__anonymous__' ]);
};
Refine.JsonParserUI.prototype._scheduleUpdatePreview = function() {
if (this._timerID != null) {
window.clearTimeout(this._timerID);
this._timerID = null;
}
var self = this;
this._timerID = window.setTimeout(function() {
self._timerID = null;
self._updatePreview();
}, 500); // 0.5 second
};
Refine.JsonParserUI.prototype._setRecordPath = function(path) {
this._config.recordPath = path;
this._updatePreview();
};
Refine.JsonParserUI.prototype._updatePreview = function() {
var self = this;
this._progressContainer.show();
var options = this.getOptions();
// for preview, we need exact text, so it's easier to show where the columns are split
options.guessCellValueTypes = false;
this._controller.updateFormatAndOptions(options, function(result) {
if (result.status == "ok") {
self._controller.getPreviewData(function(projectData) {
self._progressContainer.hide();
new Refine.PreviewTable(projectData, self._dataContainer.unbind().empty());
}, 100);
}
});
};

View File

@ -0,0 +1,45 @@
<div class="grid-layout layout-loose layout-full"><table>
<tr>
<td><div class="grid-layout layout-tighter"><table>
<tr>
<td width="1%">Character&nbsp;encoding</td>
<td><select bind="encodingSelect"></select></td>
</tr>
</table></div></td>
<td colspan="2"><div class="grid-layout layout-tighter layout-full"><table>
<tr>
<td style="text-align: right;">&nbsp;</td>
<td width="1%"><button class="button" bind="previewButton">Update&nbsp;Preview</button></td>
</tr>
</table></div></td>
</tr>
<tr>
<td><div class="grid-layout layout-tightest"><table>
<tr><td colspan="2">Parse every <input bind="linesPerRowInput" type="text" class="lightweight" size="2" value="0" />
lines into one row
</td></tr>
<tr><td colspan="2">Lines are separated by</td></tr>
<tr><td width="1%"><input type="radio" name="row-separator" value="new-line" /></td><td>new line characters \n</td></tr>
<tr><td width="1%"><input type="radio" name="row-separator" value="custom" /></td><td>custom
<input bind="rowSeparatorInput" type="text" class="lightweight" size="5" /></td></tr>
<tr><td colspan="2">Escape special characters with \</td></tr>
</table></div></td>
<td><div class="grid-layout layout-tightest"><table>
<tr><td width="1%"><input type="checkbox" bind="storeBlankRowsCheckbox" /></td>
<td colspan="2">Store blank rows</td></tr>
<tr><td width="1%"><input type="checkbox" bind="storeBlankCellsAsNullsCheckbox" /></td>
<td colspan="2">Store blank cells as nulls</td></tr>
<tr><td width="1%"><input type="checkbox" bind="includeFileSourcesCheckbox" /></td>
<td>Store file source<br/>(file names, URLs)<br/>in each row</td></tr>
</table></div></td>
<td colspan="2"><div class="grid-layout layout-tightest"><table>
<tr><td width="1%"><input type="checkbox" bind="ignoreCheckbox" /></td><td>Ignore first</td>
<td><input bind="ignoreInput" type="text" class="lightweight" size="2" value="0" /> line(s) at beginning of file</td></tr>
<tr><td width="1%"><input type="checkbox" bind="skipCheckbox" /></td><td>Discard initial</td>
<td><input bind="skipInput" type="text" class="lightweight" size="2" value="0" /> row(s) of data</td></tr>
<tr><td width="1%"><input type="checkbox" bind="limitCheckbox" /></td><td>Load at most</td>
<td><input bind="limitInput" type="text" class="lightweight" size="2" value="0" /> row(s) of data</td></tr>
</table></div></td>
</tr>
</table></div>

Some files were not shown because too many files have changed in this diff Show More