RandomSec/main/src/com/google/refine/exporters/CustomizableTabularExporterUtilities.java

395 lines
16 KiB
Java
Raw Normal View History

/*
Copyright 2010, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.exporters;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
2018-05-04 14:15:02 +02:00
import java.time.OffsetDateTime;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.TimeZone;
data package metadata (#1398) * fix the appbundle issue #1209 * fix #1162 allow the JRE 9 * fix the package declarations * remove the _ from the method name * use the explicit scoping * remote extra ; * fix issued from codacy * fix issued from codacy * add preferences link to the index page * handle the empty user metadata * fix 'last modified' sorting issue #1307 * prevent overflow of the table. issue #1306 * add isoDateParser to sort the date * prevent overflow of the project index * remove sorter arrow for action columns * disable editing the internal metadata * adjust the width of the table * change MetaData to Metadata * change the filed name from rowNumber to rowCount * put back the incidently deleted gitignore * add double quote to prevent word splitting * UI improvement on metadata view and project list view * remove the date field in metadata * message notification of the free RAM. Issue #1295 * UI tuning for metadata view * shorten the ISO date to locale date format * Added translation using Weblate (Portuguese (Brazil)) * remove the rename link * Ignore empty language files introduced by Weblate * Add UI for Invert text filter * Backend support for Inverting Text search facets * Fix reset on text search facet * More succinct return statements * add tests for SetProjectMetadataCommand * Tidying up for Codacy * Added Tests for TextSearchFilter * Corrections for Codacy * More code tidy up * let the browser auto fit the table cell when resizing/zooming * fix import multiple excel with mulitple sheets issue #1328 * check if the project has the userMetadata * fix the unit test support multi files with multi tables for open office * prevent the same key for user metadata * replace _ with variable for exception * fix the no-undef issue * to adjust the width of transform dialog. issue #1332 * fix the row count refresh issue * extract method * move the log message * cosmatic changes for codacy * fix typo * bump to version 2.8 * .gitignore is now working * preview stage won't have the metadata populated, so protect NPE * Update README.md No more direct link to the last version tag, which will avoid having to think of updating the readme * refacotring the ProjectMetadata class * introduce the IMetadata interface * create submodule of dataschema * add back * setup lib for dataschema; upgrade the apache lang to lang3 * replace escape* functions from apache lang3 * replace the ProjectMetadata with IMetadata interface * add missing jars * set the IMetadata a field of Project * remove PreferenceStore out of Project model * fix test SetProjectMetadataCommandTests by casting * introdcue the AbstractMetadata * introdcue the AbstractMetadata * reorganize the metadata package * allow have mulitiple metadata for a project * support for mulitple metadata format * remove jdk7 since 'table schema' java implmentation only support jdk8+ * set execute permission for script * fix the Unit Test after Metadata refactoring * restore the apache lang2.5 since jetty 6.1.22 depend on it * add commons lang 2.5 jar * git submodule add https://github.com/frictionlessdata/datapackage-java * remove the metadata parameter from the ProjectManager.registerProject method * remove hashmap _projectsMetadata field from the ProjectManager and FileProjectManager * init the Project.metadataMap * fix Unit Test * restore the ProjectMetaData map to ProjectManager * put the ProjectMetaDta in place for ProjectManager and Project object * check null of singleton instead of create a constructor just for test * load the data package metadata * importing data package * importing data package * encapsulate the Package class into DataPackageMetadata * user _ to indicate the class fields * introduce base URL in order to download the data files * import data package UI and draft backend * import data package UI * fix typo * download the data set pointed from metadata resource * save and load the data package metadata * avoid magic string * package cleanup * set the java_version to 1.8 * set the min jdk to 1.8 * add the 3rd party src in the build.xml * skip the file selection page if only 1 DATA file * add files structure for json editor * seperate out the metadata file from the retrival file list * rename the OKF_METADATA to DATAPACKAGE_METADATA * clean up * implement GetMetadateCommand class * display the metadata in json format * git submodule update --remote --merge * adjust the setting after pulling from datapackage origin * fix the failed UT DateExtensionTests.testFetchCounts due to new json jar json-20160810.jar will complain: JSONObject["float"] not a string. * clean up the weird loop array syntax get complained * remove the unused constant * export in data package format * interface cleanup * fix UT * edit the metadata * add UT for SetMetadataCommand * fix UT for SetMetadataCommand * display the data package metadata link on the project index page * update submodule * log the exceptions * Ajv does not work properly, use the back end validation instead * enable the validation for jsoneditor * first draft of the data validation * create a map to hold the constraint and its handler * rename * support for minLength and maxLength from spec * add validate command * test the opeation instead of validate command * rename the UT * format the error message and push to the report * fix row number * add resource bundle for validator * inject the code of the constrains * make the StrSubstitutor works * extract the type and format information * add the customizedFormat to interface to allow format properly * get rid of magic string * take care of missing parts of the data package * implement RequiredConstraint * patch for number type * add max/min constraints * get the constrains directly from field * implement the PatternConstraint * suppress warning * fix the broken UT when expecting 2 digits fraction * handle the cast and type properly * fix the missing resource files for data package when run from command line * use the copy instead of copydir * add script for appveyor * update script for appveyor * do recursive clone * correct the git url * fix clone path * clone folder option does not work * will put another PR for this. delete for now * revert the interface method name * lazy loading the project data * disable the validate menu for now * add UT * assert UTs * add UT * fix #1386 * remove import * test the thread * Revert "test the thread" This reverts commit 779214160055afe3ccdcc18c57b0c7c72e87c824. * fix the URLCachingTest UT * define the template data package * tidy up the metadata interface * check the http response code * fix the package * display user friendly message when URL path is not reachable * populate the data package schema * Delete hs_err_pid15194.log * populate data package info * add username preference and it will be pulled as the creator of the metadata * undo the project.updateColumnChange() and start to introduce the fields into the existing core model * tightly integrate the data package metadata * tightly integrate the data package metadata for project level * remove the submodule * move the edit botton * clean up build * load the new property * load the project metadata * fix issues from codacy * remove unused fields and annotation * check the http response code firstly * import zipped data package * allow without keywords * process the zip data package from url * merge the tags * check store firstly * remove the table schema src * move the json schema files to schema dir * add comment * add comment * remove git moduels * add incidently deleted file * fix typo * remove SetMetadataCommand * revert change * merge from master
2018-02-02 14:24:19 +01:00
import org.apache.commons.lang3.StringUtils;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import com.google.refine.ProjectManager;
import com.google.refine.browsing.Engine;
import com.google.refine.browsing.FilteredRows;
import com.google.refine.browsing.RowVisitor;
import com.google.refine.exporters.TabularSerializer.CellData;
import com.google.refine.model.Cell;
import com.google.refine.model.Column;
import com.google.refine.model.Project;
import com.google.refine.model.Recon;
import com.google.refine.model.Row;
import com.google.refine.preference.PreferenceStore;
import com.google.refine.util.JSONUtilities;
import com.google.refine.util.ParsingUtilities;
abstract public class CustomizableTabularExporterUtilities {
static public void exportRows(
final Project project,
final Engine engine,
Properties params,
final TabularSerializer serializer) {
String optionsString = (params != null) ? params.getProperty("options") : null;
JSONObject optionsTemp = null;
if (optionsString != null) {
try {
optionsTemp = ParsingUtilities.evaluateJsonStringToObject(optionsString);
} catch (JSONException e) {
// Ignore and keep options null.
}
}
final JSONObject options = optionsTemp;
final boolean outputColumnHeaders = options == null ? true :
JSONUtilities.getBoolean(options, "outputColumnHeaders", true);
final boolean outputEmptyRows = options == null ? false :
JSONUtilities.getBoolean(options, "outputBlankRows", true);
final int limit = options == null ? -1 :
JSONUtilities.getInt(options, "limit", -1);
final List<String> columnNames;
final Map<String, CellFormatter> columnNameToFormatter =
new HashMap<String, CustomizableTabularExporterUtilities.CellFormatter>();
JSONArray columnOptionArray = options == null ? null :
JSONUtilities.getArray(options, "columns");
if (columnOptionArray == null) {
List<Column> columns = project.columnModel.columns;
columnNames = new ArrayList<String>(columns.size());
for (Column column : columns) {
String name = column.getName();
columnNames.add(name);
columnNameToFormatter.put(name, new CellFormatter());
}
} else {
int count = columnOptionArray.length();
columnNames = new ArrayList<String>(count);
for (int i = 0; i < count; i++) {
JSONObject columnOptions = JSONUtilities.getObjectElement(columnOptionArray, i);
if (columnOptions != null) {
String name = JSONUtilities.getString(columnOptions, "name", null);
if (name != null) {
columnNames.add(name);
columnNameToFormatter.put(name, new CellFormatter(columnOptions));
}
}
}
}
RowVisitor visitor = new RowVisitor() {
int rowCount = 0;
@Override
public void start(Project project) {
serializer.startFile(options);
if (outputColumnHeaders) {
List<CellData> cells = new ArrayList<TabularSerializer.CellData>(columnNames.size());
for (String name : columnNames) {
cells.add(new CellData(name, name, name, null));
}
serializer.addRow(cells, true);
}
}
@Override
public boolean visit(Project project, int rowIndex, Row row) {
List<CellData> cells = new ArrayList<TabularSerializer.CellData>(columnNames.size());
int nonNullCount = 0;
for (String columnName : columnNames) {
Column column = project.columnModel.getColumnByName(columnName);
CellFormatter formatter = columnNameToFormatter.get(columnName);
CellData cellData = formatter.format(
project,
column,
row.getCell(column.getCellIndex()));
cells.add(cellData);
if (cellData != null) {
nonNullCount++;
}
}
if (nonNullCount > 0 || outputEmptyRows) {
serializer.addRow(cells, false);
rowCount++;
}
return limit > 0 && rowCount >= limit;
}
@Override
public void end(Project project) {
serializer.endFile();
}
};
FilteredRows filteredRows = engine.getAllFilteredRows();
filteredRows.accept(project, visitor);
}
static public int[] countColumnsRows(
final Project project,
final Engine engine,
Properties params) {
RowCountingTabularSerializer serializer = new RowCountingTabularSerializer();
exportRows(project, engine, params, serializer);
return new int[] { serializer.columns, serializer.rows };
}
static private class RowCountingTabularSerializer implements TabularSerializer {
int columns;
int rows;
@Override
public void startFile(JSONObject options) {
}
@Override
public void endFile() {
}
@Override
public void addRow(List<CellData> cells, boolean isHeader) {
columns = Math.max(columns, cells.size());
rows++;
}
}
private enum ReconOutputMode {
ENTITY_NAME,
ENTITY_ID,
CELL_CONTENT
}
private enum DateFormatMode {
ISO_8601,
SHORT_LOCALE,
MEDIUM_LOCALE,
LONG_LOCALE,
FULL_LOCALE,
CUSTOM
}
final static private String fullIso8601 = "yyyy-MM-dd'T'HH:mm:ss'Z'";
static private class CellFormatter {
ReconOutputMode recon_outputMode = ReconOutputMode.ENTITY_NAME;
boolean recon_blankUnmatchedCells = false;
boolean recon_linkToEntityPages = true;
DateFormatMode date_formatMode = DateFormatMode.ISO_8601;
String date_custom = null;
boolean date_useLocalTimeZone = false;
boolean date_omitTime = false;
DateFormat dateFormatter;
Map<String, String> identifierSpaceToUrl = null;
2018-04-27 03:50:46 +02:00
//SQLExporter parameter to convert null cell value to empty string
boolean includeNullFieldValue = false;
CellFormatter() {
dateFormatter = new SimpleDateFormat(fullIso8601);
}
CellFormatter(JSONObject options) {
JSONObject reconSettings = JSONUtilities.getObject(options, "reconSettings");
2018-04-27 03:50:46 +02:00
includeNullFieldValue = JSONUtilities.getBoolean(options, "nullValueToEmptyStr", false);
if (reconSettings != null) {
String reconOutputString = JSONUtilities.getString(reconSettings, "output", null);
if ("entity-name".equals(reconOutputString)) {
recon_outputMode = ReconOutputMode.ENTITY_NAME;
} else if ("entity-id".equals(reconOutputString)) {
recon_outputMode = ReconOutputMode.ENTITY_ID;
} else if ("cell-content".equals(reconOutputString)) {
recon_outputMode = ReconOutputMode.CELL_CONTENT;
}
recon_blankUnmatchedCells = JSONUtilities.getBoolean(reconSettings, "blankUnmatchedCells", recon_blankUnmatchedCells);
recon_linkToEntityPages = JSONUtilities.getBoolean(reconSettings, "linkToEntityPages", recon_linkToEntityPages);
}
JSONObject dateSettings = JSONUtilities.getObject(options, "dateSettings");
if (dateSettings != null) {
String dateFormatString = JSONUtilities.getString(dateSettings, "format", null);
if ("iso-8601".equals(dateFormatString)) {
date_formatMode = DateFormatMode.ISO_8601;
} else if ("locale-short".equals(dateFormatString)) {
date_formatMode = DateFormatMode.SHORT_LOCALE;
} else if ("locale-medium".equals(dateFormatString)) {
date_formatMode = DateFormatMode.MEDIUM_LOCALE;
} else if ("locale-long".equals(dateFormatString)) {
date_formatMode = DateFormatMode.LONG_LOCALE;
} else if ("locale-full".equals(dateFormatString)) {
date_formatMode = DateFormatMode.FULL_LOCALE;
} else if ("custom".equals(dateFormatString)) {
date_formatMode = DateFormatMode.CUSTOM;
}
date_custom = JSONUtilities.getString(dateSettings, "custom", null);
date_useLocalTimeZone = JSONUtilities.getBoolean(dateSettings, "useLocalTimeZone", date_useLocalTimeZone);
date_omitTime = JSONUtilities.getBoolean(dateSettings, "omitTime", date_omitTime);
if (date_formatMode == DateFormatMode.CUSTOM &&
(date_custom == null || date_custom.isEmpty())) {
date_formatMode = DateFormatMode.ISO_8601;
}
}
switch (date_formatMode) {
case SHORT_LOCALE:
dateFormatter = date_omitTime ?
SimpleDateFormat.getDateInstance(SimpleDateFormat.SHORT) :
SimpleDateFormat.getDateTimeInstance(SimpleDateFormat.SHORT, SimpleDateFormat.SHORT);
break;
case MEDIUM_LOCALE:
dateFormatter = date_omitTime ?
SimpleDateFormat.getDateInstance(SimpleDateFormat.MEDIUM) :
SimpleDateFormat.getDateTimeInstance(SimpleDateFormat.MEDIUM, SimpleDateFormat.MEDIUM);
break;
case LONG_LOCALE:
dateFormatter = date_omitTime ?
SimpleDateFormat.getDateInstance(SimpleDateFormat.LONG) :
SimpleDateFormat.getDateTimeInstance(SimpleDateFormat.LONG, SimpleDateFormat.LONG);
break;
case FULL_LOCALE:
dateFormatter = date_omitTime ?
SimpleDateFormat.getDateInstance(SimpleDateFormat.FULL) :
SimpleDateFormat.getDateTimeInstance(SimpleDateFormat.FULL, SimpleDateFormat.FULL);
break;
case CUSTOM:
dateFormatter = new SimpleDateFormat(date_custom);
break;
default:
dateFormatter = date_omitTime ?
new SimpleDateFormat("yyyy-MM-dd") :
new SimpleDateFormat(fullIso8601);
}
if (!date_useLocalTimeZone) {
dateFormatter.setTimeZone(TimeZone.getTimeZone("UTC"));
}
}
CellData format(Project project, Column column, Cell cell) {
if (cell != null) {
String link = null;
String text = null;
if (cell.recon != null) {
Recon recon = cell.recon;
if (recon.judgment == Recon.Judgment.Matched) {
if (recon_outputMode == ReconOutputMode.ENTITY_NAME) {
text = recon.match.name;
} else if (recon_outputMode == ReconOutputMode.ENTITY_ID) {
text = recon.match.id;
} // else: output cell content
if (recon_linkToEntityPages) {
buildIdentifierSpaceToUrlMap();
String service = recon.service;
String viewUrl = identifierSpaceToUrl.get(service);
if (viewUrl != null) {
link = StringUtils.replace(viewUrl, "{{id}}", recon.match.id);
}
}
} else if (recon_blankUnmatchedCells) {
return null;
}
}
Object value = cell.value;
if (value != null) {
if (text == null) {
if (value instanceof String) {
text = (String) value;
2018-05-04 14:15:02 +02:00
} else if (value instanceof OffsetDateTime) {
text = ((OffsetDateTime) value).format(DateTimeFormatter.ISO_INSTANT);
} else {
text = value.toString();
}
}
return new CellData(column.getName(), value, text, link);
}
2018-04-27 03:50:46 +02:00
}else {//added for sql exporter
if(includeNullFieldValue) {
return new CellData(column.getName(), "", "", "");
}
}
return null;
}
void buildIdentifierSpaceToUrlMap() {
if (identifierSpaceToUrl != null) {
return;
}
identifierSpaceToUrl = new HashMap<String, String>();
PreferenceStore ps = ProjectManager.singleton.getPreferenceStore();
JSONArray services = (JSONArray) ps.get("reconciliation.standardServices");
if (services != null) {
int count = services.length();
for (int i = 0; i < count; i++) {
JSONObject service = JSONUtilities.getObjectElement(services, i);
JSONObject view = JSONUtilities.getObject(service, "view");
if (view != null) {
String url = JSONUtilities.getString(service, "url", null);
String viewUrl = JSONUtilities.getString(view, "url", null);
if (url != null && viewUrl != null) {
identifierSpaceToUrl.put(url, viewUrl);
}
}
}
}
}
}
2018-04-27 03:50:46 +02:00
}