c4b0ff6bea
* fix the appbundle issue #1209 * fix #1162 allow the JRE 9 * fix the package declarations * remove the _ from the method name * use the explicit scoping * remote extra ; * fix issued from codacy * fix issued from codacy * add preferences link to the index page * handle the empty user metadata * fix 'last modified' sorting issue #1307 * prevent overflow of the table. issue #1306 * add isoDateParser to sort the date * prevent overflow of the project index * remove sorter arrow for action columns * disable editing the internal metadata * adjust the width of the table * change MetaData to Metadata * change the filed name from rowNumber to rowCount * put back the incidently deleted gitignore * add double quote to prevent word splitting * UI improvement on metadata view and project list view * remove the date field in metadata * message notification of the free RAM. Issue #1295 * UI tuning for metadata view * shorten the ISO date to locale date format * Added translation using Weblate (Portuguese (Brazil)) * remove the rename link * Ignore empty language files introduced by Weblate * Add UI for Invert text filter * Backend support for Inverting Text search facets * Fix reset on text search facet * More succinct return statements * add tests for SetProjectMetadataCommand * Tidying up for Codacy * Added Tests for TextSearchFilter * Corrections for Codacy * More code tidy up * let the browser auto fit the table cell when resizing/zooming * fix import multiple excel with mulitple sheets issue #1328 * check if the project has the userMetadata * fix the unit test support multi files with multi tables for open office * prevent the same key for user metadata * replace _ with variable for exception * fix the no-undef issue * to adjust the width of transform dialog. issue #1332 * fix the row count refresh issue * extract method * move the log message * cosmatic changes for codacy * fix typo * bump to version 2.8 * .gitignore is now working * preview stage won't have the metadata populated, so protect NPE * Update README.md No more direct link to the last version tag, which will avoid having to think of updating the readme * refacotring the ProjectMetadata class * introduce the IMetadata interface * create submodule of dataschema * add back * setup lib for dataschema; upgrade the apache lang to lang3 * replace escape* functions from apache lang3 * replace the ProjectMetadata with IMetadata interface * add missing jars * set the IMetadata a field of Project * remove PreferenceStore out of Project model * fix test SetProjectMetadataCommandTests by casting * introdcue the AbstractMetadata * introdcue the AbstractMetadata * reorganize the metadata package * allow have mulitiple metadata for a project * support for mulitple metadata format * remove jdk7 since 'table schema' java implmentation only support jdk8+ * set execute permission for script * fix the Unit Test after Metadata refactoring * restore the apache lang2.5 since jetty 6.1.22 depend on it * add commons lang 2.5 jar * git submodule add https://github.com/frictionlessdata/datapackage-java * remove the metadata parameter from the ProjectManager.registerProject method * remove hashmap _projectsMetadata field from the ProjectManager and FileProjectManager * init the Project.metadataMap * fix Unit Test * restore the ProjectMetaData map to ProjectManager * put the ProjectMetaDta in place for ProjectManager and Project object * check null of singleton instead of create a constructor just for test * load the data package metadata * importing data package * importing data package * encapsulate the Package class into DataPackageMetadata * user _ to indicate the class fields * introduce base URL in order to download the data files * import data package UI and draft backend * import data package UI * fix typo * download the data set pointed from metadata resource * save and load the data package metadata * avoid magic string * package cleanup * set the java_version to 1.8 * set the min jdk to 1.8 * add the 3rd party src in the build.xml * skip the file selection page if only 1 DATA file * add files structure for json editor * seperate out the metadata file from the retrival file list * rename the OKF_METADATA to DATAPACKAGE_METADATA * clean up * implement GetMetadateCommand class * display the metadata in json format * git submodule update --remote --merge * adjust the setting after pulling from datapackage origin * fix the failed UT DateExtensionTests.testFetchCounts due to new json jar json-20160810.jar will complain: JSONObject["float"] not a string. * clean up the weird loop array syntax get complained * remove the unused constant * export in data package format * interface cleanup * fix UT * edit the metadata * add UT for SetMetadataCommand * fix UT for SetMetadataCommand * display the data package metadata link on the project index page * update submodule * log the exceptions * Ajv does not work properly, use the back end validation instead * enable the validation for jsoneditor * first draft of the data validation * create a map to hold the constraint and its handler * rename * support for minLength and maxLength from spec * add validate command * test the opeation instead of validate command * rename the UT * format the error message and push to the report * fix row number * add resource bundle for validator * inject the code of the constrains * make the StrSubstitutor works * extract the type and format information * add the customizedFormat to interface to allow format properly * get rid of magic string * take care of missing parts of the data package * implement RequiredConstraint * patch for number type * add max/min constraints * get the constrains directly from field * implement the PatternConstraint * suppress warning * fix the broken UT when expecting 2 digits fraction * handle the cast and type properly * fix the missing resource files for data package when run from command line * use the copy instead of copydir * add script for appveyor * update script for appveyor * do recursive clone * correct the git url * fix clone path * clone folder option does not work * will put another PR for this. delete for now * revert the interface method name * lazy loading the project data * disable the validate menu for now * add UT * assert UTs * add UT * fix #1386 * remove import * test the thread * Revert "test the thread" This reverts commit 779214160055afe3ccdcc18c57b0c7c72e87c824. * fix the URLCachingTest UT * define the template data package * tidy up the metadata interface * check the http response code * fix the package * display user friendly message when URL path is not reachable * populate the data package schema * Delete hs_err_pid15194.log * populate data package info * add username preference and it will be pulled as the creator of the metadata * undo the project.updateColumnChange() and start to introduce the fields into the existing core model * tightly integrate the data package metadata * tightly integrate the data package metadata for project level * remove the submodule * move the edit botton * clean up build * load the new property * load the project metadata * fix issues from codacy * remove unused fields and annotation * check the http response code firstly * import zipped data package * allow without keywords * process the zip data package from url * merge the tags * check store firstly * remove the table schema src * move the json schema files to schema dir * add comment * add comment * remove git moduels * add incidently deleted file * fix typo * remove SetMetadataCommand * revert change * merge from master
382 lines
14 KiB
Java
382 lines
14 KiB
Java
/*
|
|
|
|
Copyright 2010,2012 Google Inc.
|
|
All rights reserved.
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions are
|
|
met:
|
|
|
|
* Redistributions of source code must retain the above copyright
|
|
notice, this list of conditions and the following disclaimer.
|
|
* Redistributions in binary form must reproduce the above
|
|
copyright notice, this list of conditions and the following disclaimer
|
|
in the documentation and/or other materials provided with the
|
|
distribution.
|
|
* Neither the name of Google Inc. nor the names of its
|
|
contributors may be used to endorse or promote products derived from
|
|
this software without specific prior written permission.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
package com.google.refine.importers;
|
|
|
|
import java.io.File;
|
|
import java.io.IOException;
|
|
import java.io.InputStream;
|
|
import java.io.Serializable;
|
|
import java.util.List;
|
|
|
|
import com.fasterxml.jackson.core.JsonFactory;
|
|
import com.fasterxml.jackson.core.JsonParseException;
|
|
import com.fasterxml.jackson.core.JsonParser;
|
|
import com.fasterxml.jackson.core.JsonParser.NumberType;
|
|
import com.fasterxml.jackson.core.JsonToken;
|
|
import org.json.JSONArray;
|
|
import org.json.JSONObject;
|
|
import org.slf4j.Logger;
|
|
import org.slf4j.LoggerFactory;
|
|
|
|
import com.google.refine.importers.tree.ImportColumnGroup;
|
|
import com.google.refine.importers.tree.TreeImportingParserBase;
|
|
import com.google.refine.importers.tree.TreeReader;
|
|
import com.google.refine.importers.tree.TreeReaderException;
|
|
import com.google.refine.importing.ImportingJob;
|
|
import com.google.refine.importing.ImportingUtilities;
|
|
import com.google.refine.model.Project;
|
|
import com.google.refine.model.medadata.ProjectMetadata;
|
|
import com.google.refine.util.JSONUtilities;
|
|
|
|
public class JsonImporter extends TreeImportingParserBase {
|
|
static final Logger logger = LoggerFactory.getLogger(JsonImporter.class);
|
|
|
|
public final static String ANONYMOUS = "_";
|
|
|
|
public JsonImporter() {
|
|
super(true);
|
|
}
|
|
|
|
static private class PreviewParsingState {
|
|
int tokenCount;
|
|
}
|
|
|
|
final static private int PREVIEW_PARSING_LIMIT = 1000;
|
|
|
|
@Override
|
|
public JSONObject createParserUIInitializationData(
|
|
ImportingJob job, List<JSONObject> fileRecords, String format) {
|
|
JSONObject options = super.createParserUIInitializationData(job, fileRecords, format);
|
|
if (fileRecords.size() > 0) {
|
|
try {
|
|
JSONObject firstFileRecord = fileRecords.get(0);
|
|
File file = ImportingUtilities.getFile(job, firstFileRecord);
|
|
JsonFactory factory = new JsonFactory();
|
|
JsonParser parser = factory.createJsonParser(file);
|
|
|
|
PreviewParsingState state = new PreviewParsingState();
|
|
Object rootValue = parseForPreview(parser, state);
|
|
if (rootValue != null) {
|
|
JSONUtilities.safePut(options, "dom", rootValue);
|
|
}
|
|
} catch (IOException e) {
|
|
logger.error("Error generating parser UI initialization data for JSON file", e);
|
|
}
|
|
}
|
|
|
|
return options;
|
|
}
|
|
|
|
final static private Object parseForPreview(JsonParser parser, PreviewParsingState state, JsonToken token)
|
|
throws JsonParseException, IOException {
|
|
if (token != null) {
|
|
switch (token) {
|
|
case START_ARRAY:
|
|
return parseArrayForPreview(parser, state);
|
|
case START_OBJECT:
|
|
return parseObjectForPreview(parser, state);
|
|
case VALUE_STRING:
|
|
return parser.getText();
|
|
case VALUE_NUMBER_INT:
|
|
return Long.valueOf(parser.getLongValue());
|
|
case VALUE_NUMBER_FLOAT:
|
|
return Double.valueOf(parser.getDoubleValue());
|
|
case VALUE_TRUE:
|
|
return Boolean.TRUE;
|
|
case VALUE_FALSE:
|
|
return Boolean.FALSE;
|
|
case VALUE_NULL:
|
|
return null;
|
|
case END_ARRAY:
|
|
case END_OBJECT:
|
|
case FIELD_NAME:
|
|
case NOT_AVAILABLE:
|
|
case VALUE_EMBEDDED_OBJECT:
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
final static private Object parseForPreview(JsonParser parser, PreviewParsingState state) {
|
|
try {
|
|
JsonToken token = parser.nextToken();
|
|
state.tokenCount++;
|
|
return parseForPreview(parser, state, token);
|
|
} catch (IOException e) {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
final static private JSONObject parseObjectForPreview(JsonParser parser, PreviewParsingState state) {
|
|
JSONObject result = new JSONObject();
|
|
loop:while (state.tokenCount < PREVIEW_PARSING_LIMIT) {
|
|
try {
|
|
JsonToken token = parser.nextToken();
|
|
if (token == null) {
|
|
break;
|
|
}
|
|
state.tokenCount++;
|
|
|
|
switch (token) {
|
|
case FIELD_NAME:
|
|
String fieldName = parser.getText();
|
|
Object fieldValue = parseForPreview(parser, state);
|
|
JSONUtilities.safePut(result, fieldName, fieldValue);
|
|
break;
|
|
case END_OBJECT:
|
|
break loop;
|
|
default:
|
|
break loop;
|
|
}
|
|
} catch (IOException e) {
|
|
break;
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
final static private JSONArray parseArrayForPreview(JsonParser parser, PreviewParsingState state) {
|
|
JSONArray result = new JSONArray();
|
|
loop:while (state.tokenCount < PREVIEW_PARSING_LIMIT) {
|
|
try {
|
|
JsonToken token = parser.nextToken();
|
|
if (token == null) {
|
|
break;
|
|
}
|
|
state.tokenCount++;
|
|
|
|
switch (token) {
|
|
case END_ARRAY:
|
|
break loop;
|
|
default:
|
|
Object element = parseForPreview(parser, state, token);
|
|
JSONUtilities.append(result, element);
|
|
}
|
|
} catch (IOException e) {
|
|
break;
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
@Override
|
|
public void parseOneFile(Project project, ProjectMetadata metadata,
|
|
ImportingJob job, String fileSource, InputStream is,
|
|
ImportColumnGroup rootColumnGroup, int limit, JSONObject options, List<Exception> exceptions) {
|
|
|
|
parseOneFile(project, metadata, job, fileSource,
|
|
new JSONTreeReader(is), rootColumnGroup, limit, options, exceptions);
|
|
|
|
super.parseOneFile(project, metadata, job, fileSource, is, rootColumnGroup, limit, options, exceptions);
|
|
}
|
|
|
|
static public class JSONTreeReader implements TreeReader {
|
|
final static Logger logger = LoggerFactory.getLogger("JsonParser");
|
|
|
|
JsonFactory factory = new JsonFactory();
|
|
JsonParser parser = null;
|
|
|
|
private JsonToken current = null;
|
|
private JsonToken next = null;
|
|
private String fieldName = ANONYMOUS;
|
|
private Serializable fieldValue = null;
|
|
|
|
|
|
public JSONTreeReader(InputStream is) {
|
|
try {
|
|
parser = factory.createJsonParser(is);
|
|
current = null;
|
|
next = parser.nextToken();
|
|
} catch (IOException e) {
|
|
e.printStackTrace();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Does nothing. All Json is treated as elements
|
|
*/
|
|
@Override
|
|
public int getAttributeCount() {
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Does nothing. All Json is treated as elements
|
|
*/
|
|
@Override
|
|
public String getAttributeLocalName(int index) {
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Does nothing. All Json is treated as elements
|
|
*/
|
|
@Override
|
|
public String getAttributePrefix(int index) {
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Does nothing. All Json is treated as elements
|
|
*/
|
|
@Override
|
|
public String getAttributeValue(int index) {
|
|
return null;
|
|
}
|
|
|
|
@Override
|
|
public Token current() {
|
|
if (current != null) {
|
|
return this.mapToToken(current);
|
|
} else {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
@Override
|
|
public String getFieldName() throws TreeReaderException {
|
|
return fieldName;
|
|
}
|
|
|
|
/**
|
|
* Does nothing. Json does not have prefixes
|
|
*/
|
|
@Override
|
|
public String getPrefix() {
|
|
return null;
|
|
}
|
|
|
|
@Override
|
|
public String getFieldValue() throws TreeReaderException {
|
|
return fieldValue.toString();
|
|
}
|
|
|
|
@Override
|
|
public Serializable getValue()
|
|
throws TreeReaderException {
|
|
return fieldValue;
|
|
}
|
|
@Override
|
|
public boolean hasNext() {
|
|
return next != null;
|
|
}
|
|
|
|
private Serializable getValue(JsonParser parser, JsonToken token) throws IOException {
|
|
if (token != null) {
|
|
switch (token) {
|
|
case VALUE_STRING:
|
|
return parser.getText();
|
|
case VALUE_NUMBER_INT:
|
|
if (parser.getNumberType() == NumberType.INT || parser.getNumberType() == NumberType.LONG) {
|
|
return Long.valueOf(parser.getLongValue());
|
|
} else {
|
|
return parser.getNumberValue();
|
|
}
|
|
case VALUE_NUMBER_FLOAT:
|
|
if (parser.getNumberType() == NumberType.FLOAT) {
|
|
return Float.valueOf(parser.getFloatValue());
|
|
} else if (parser.getNumberType() == NumberType.DOUBLE) {
|
|
return Double.valueOf(parser.getDoubleValue());
|
|
} else {
|
|
return parser.getNumberValue();
|
|
}
|
|
case VALUE_TRUE:
|
|
return Boolean.TRUE;
|
|
case VALUE_FALSE:
|
|
return Boolean.FALSE;
|
|
case VALUE_NULL:
|
|
return null;
|
|
case END_ARRAY:
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
@Override
|
|
public Token next() throws TreeReaderException {
|
|
JsonToken previous = current;
|
|
current = next;
|
|
next = null; // in case an exception is thrown
|
|
try {
|
|
if (current != null) {
|
|
if (current.isScalarValue()) {
|
|
fieldValue = getValue(parser,current);
|
|
} else {
|
|
fieldValue = null;
|
|
}
|
|
if (current == JsonToken.FIELD_NAME) {
|
|
fieldName = parser.getText();
|
|
} else if (current == JsonToken.START_ARRAY
|
|
|| current == JsonToken.START_OBJECT) {
|
|
// Use current field name for next level object
|
|
// ie elide one level of anonymous fields
|
|
if (previous != JsonToken.FIELD_NAME) {
|
|
fieldName = ANONYMOUS;
|
|
}
|
|
}
|
|
}
|
|
next = parser.nextToken();
|
|
} catch (IOException e) {
|
|
throw new TreeReaderException(e);
|
|
}
|
|
return current();
|
|
}
|
|
|
|
protected Token mapToToken(JsonToken token){
|
|
switch(token){
|
|
case START_ARRAY: return Token.StartEntity;
|
|
case END_ARRAY: return Token.EndEntity;
|
|
case START_OBJECT: return Token.StartEntity;
|
|
case END_OBJECT: return Token.EndEntity;
|
|
case VALUE_STRING: return Token.Value;
|
|
case FIELD_NAME: return Token.Ignorable; //returned by the getLocalName function()
|
|
case VALUE_NUMBER_INT: return Token.Value;
|
|
//Json does not have START_DOCUMENT token type (so ignored as default)
|
|
//Json does not have END_DOCUMENT token type (so ignored as default)
|
|
case VALUE_TRUE : return Token.Value;
|
|
case VALUE_NUMBER_FLOAT : return Token.Value;
|
|
case VALUE_NULL : return Token.Value;
|
|
case VALUE_FALSE : return Token.Value;
|
|
case VALUE_EMBEDDED_OBJECT : return Token.Ignorable;
|
|
case NOT_AVAILABLE : return Token.Ignorable;
|
|
default: return Token.Ignorable;
|
|
}
|
|
}
|
|
}
|
|
}
|