Revert "data package metadata (#1398)"

This reverts commit c4b0ff6bea.
This commit is contained in:
Antonin Delpeuch 2018-11-17 20:42:34 +00:00
parent ac444b8b50
commit 5a4a79028f
140 changed files with 705 additions and 55830 deletions

View File

@ -47,6 +47,7 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.refine.ProjectManager;
import com.google.refine.ProjectMetadata;
import com.google.refine.RefineServlet;
import com.google.refine.commands.HttpUtilities;
import com.google.refine.extension.database.model.DatabaseColumn;
@ -56,7 +57,6 @@ import com.google.refine.importing.ImportingController;
import com.google.refine.importing.ImportingJob;
import com.google.refine.importing.ImportingManager;
import com.google.refine.model.Project;
import com.google.refine.model.metadata.ProjectMetadata;
import com.google.refine.util.JSONUtilities;
import com.google.refine.util.ParsingUtilities;

View File

@ -25,6 +25,7 @@ import org.testng.annotations.Parameters;
import org.testng.annotations.Test;
import com.google.refine.ProjectManager;
import com.google.refine.ProjectMetadata;
import com.google.refine.RefineServlet;
import com.google.refine.extension.database.mysql.MySQLDatabaseService;
import com.google.refine.extension.database.stub.RefineDbServletStub;
@ -32,7 +33,6 @@ import com.google.refine.importing.ImportingJob;
import com.google.refine.importing.ImportingManager;
import com.google.refine.io.FileProjectManager;
import com.google.refine.model.Project;
import com.google.refine.model.metadata.ProjectMetadata;

View File

@ -25,6 +25,7 @@ import org.testng.annotations.Parameters;
import org.testng.annotations.Test;
import com.google.refine.ProjectManager;
import com.google.refine.ProjectMetadata;
import com.google.refine.RefineServlet;
import com.google.refine.extension.database.DBExtensionTestUtils;
import com.google.refine.extension.database.DBExtensionTests;
@ -35,7 +36,6 @@ import com.google.refine.extension.database.stub.RefineDbServletStub;
import com.google.refine.importing.ImportingManager;
import com.google.refine.io.FileProjectManager;
import com.google.refine.model.Project;
import com.google.refine.model.metadata.ProjectMetadata;
public class SavedConnectionCommandTest extends DBExtensionTests{

View File

@ -37,11 +37,12 @@ import com.google.api.services.fusiontables.Fusiontables;
import com.google.api.services.fusiontables.model.Column;
import com.google.api.services.fusiontables.model.Sqlresponse;
import com.google.api.services.fusiontables.model.Table;
import com.google.refine.ProjectMetadata;
import com.google.refine.importers.TabularImportingParserBase;
import com.google.refine.importers.TabularImportingParserBase.TableDataReader;
import com.google.refine.importing.ImportingJob;
import com.google.refine.model.Project;
import com.google.refine.model.metadata.ProjectMetadata;
import com.google.refine.util.JSONUtilities;
/**

View File

@ -14,11 +14,11 @@ import com.google.api.services.sheets.v4.Sheets;
import com.google.api.services.sheets.v4.model.Sheet;
import com.google.api.services.sheets.v4.model.Spreadsheet;
import com.google.api.services.sheets.v4.model.ValueRange;
import com.google.refine.ProjectMetadata;
import com.google.refine.importers.TabularImportingParserBase;
import com.google.refine.importers.TabularImportingParserBase.TableDataReader;
import com.google.refine.importing.ImportingJob;
import com.google.refine.model.Project;
import com.google.refine.model.metadata.ProjectMetadata;
import com.google.refine.util.JSONUtilities;
public class GDataImporter {

View File

@ -30,6 +30,7 @@ import com.google.api.services.sheets.v4.Sheets;
import com.google.api.services.sheets.v4.model.Sheet;
import com.google.api.services.sheets.v4.model.Spreadsheet;
import com.google.refine.ProjectManager;
import com.google.refine.ProjectMetadata;
import com.google.refine.RefineServlet;
import com.google.refine.commands.HttpUtilities;
import com.google.refine.importing.DefaultImportingController;
@ -37,7 +38,6 @@ import com.google.refine.importing.ImportingController;
import com.google.refine.importing.ImportingJob;
import com.google.refine.importing.ImportingManager;
import com.google.refine.model.Project;
import com.google.refine.model.metadata.ProjectMetadata;
import com.google.refine.util.JSONUtilities;
import com.google.refine.util.ParsingUtilities;

View File

@ -41,10 +41,10 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.refine.ProjectMetadata;
import com.google.refine.importers.TabularImportingParserBase;
import com.google.refine.importing.ImportingJob;
import com.google.refine.model.Project;
import com.google.refine.model.metadata.ProjectMetadata;
import com.google.refine.util.JSONUtilities;
import com.google.refine.util.ParsingUtilities;

View File

@ -1,213 +0,0 @@
{
"version": "1.0.0",
"errors": {
"io-error": {
"name": "IO Error",
"type": "source",
"context": "table",
"weight": 100,
"message": "The data source returned an IO Error of type {error_type}",
"description": "Data reading error because of IO error.\n\n How it could be resolved:\n - Fix path if it's not correct."
},
"http-error": {
"name": "HTTP Error",
"type": "source",
"context": "table",
"weight": 100,
"message": "The data source returned an HTTP error with a status code of {status_code}",
"description": "Data reading error because of HTTP error.\n\n How it could be resolved:\n - Fix url link if it's not correct."
},
"source-error": {
"name": "Source Error",
"type": "source",
"context": "table",
"weight": 100,
"message": "The data source has not supported or has inconsistent contents; no tabular data can be extracted",
"description": "Data reading error because of not supported or inconsistent contents.\n\n How it could be resolved:\n - Fix data contents (e.g. change JSON data to array or arrays/objects).\n - Set correct source settings in {validator}."
},
"scheme-error": {
"name": "Scheme Error",
"type": "source",
"context": "table",
"weight": 100,
"message": "The data source is in an unknown scheme; no tabular data can be extracted",
"description": "Data reading error because of incorrect scheme.\n\n How it could be resolved:\n - Fix data scheme (e.g. change scheme from `ftp` to `http`).\n - Set correct scheme in {validator}."
},
"format-error": {
"name": "Format Error",
"type": "source",
"context": "table",
"weight": 100,
"message": "The data source is in an unknown format; no tabular data can be extracted",
"description": "Data reading error because of incorrect format.\n\n How it could be resolved:\n - Fix data format (e.g. change file extension from `txt` to `csv`).\n - Set correct format in {validator}."
},
"encoding-error": {
"name": "Encoding Error",
"type": "source",
"context": "table",
"weight": 100,
"message": "The data source could not be successfully decoded with {encoding} encoding",
"description": "Data reading error because of an encoding problem.\n\n How it could be resolved:\n - Fix data source if it's broken.\n - Set correct encoding in {validator}."
},
"blank-header": {
"name": "Blank Header",
"type": "structure",
"context": "head",
"weight": 3,
"message": "Header in column {column_number} is blank",
"description": "A column in the header row is missing a value. Column names should be provided.\n\n How it could be resolved:\n - Add the missing column name to the first row of the data source.\n - If the first row starts with, or ends with a comma, remove it.\n - If this error should be ignored disable `blank-header` check in {validator}."
},
"duplicate-header": {
"name": "Duplicate Header",
"type": "structure",
"context": "head",
"weight": 3,
"message": "Header in column {column_number} is duplicated to header in column(s) {column_numbers}",
"description": "Two columns in the header row have the same value. Column names should be unique.\n\n How it could be resolved:\n - Add the missing column name to the first row of the data.\n - If the first row starts with, or ends with a comma, remove it.\n - If this error should be ignored disable `duplicate-header` check in {validator}."
},
"blank-row": {
"name": "Blank Row",
"type": "structure",
"context": "body",
"weight": 9,
"message": "Row {row_number} is completely blank",
"description": "This row is empty. A row should contain at least one value.\n\n How it could be resolved:\n - Delete the row.\n - If this error should be ignored disable `blank-row` check in {validator}."
},
"duplicate-row": {
"name": "Duplicate Row",
"type": "structure",
"context": "body",
"weight": 5,
"message": "Row {row_number} is duplicated to row(s) {row_numbers}",
"description": "The exact same data has been seen in another row.\n\n How it could be resolved:\n - If some of the data is incorrect, correct it.\n - If the whole row is an incorrect duplicate, remove it.\n - If this error should be ignored disable `duplicate-row` check in {validator}."
},
"extra-value": {
"name": "Extra Value",
"type": "structure",
"context": "body",
"weight": 9,
"message": "Row {row_number} has an extra value in column {column_number}",
"description": "This row has more values compared to the header row (the first row in the data source). A key concept is that all the rows in tabular data must have the same number of columns.\n\n How it could be resolved:\n - Check data has an extra comma between the values in this row.\n - If this error should be ignored disable `extra-value` check in {validator}."
},
"missing-value": {
"name": "Missing Value",
"type": "structure",
"context": "body",
"weight": 9,
"message": "Row {row_number} has a missing value in column {column_number}",
"description": "This row has less values compared to the header row (the first row in the data source). A key concept is that all the rows in tabular data must have the same number of columns.\n\n How it could be resolved:\n - Check data is not missing a comma between the values in this row.\n - If this error should be ignored disable `missing-value` check in {validator}."
},
"schema-error": {
"name": "Table Schema Error",
"type": "schema",
"context": "table",
"weight": 15,
"message": "Table Schema error: {error_message}",
"description": "Provided schema is not valid.\n\n How it could be resolved:\n - Update schema descriptor to be a valid descriptor\n - If this error should be ignored disable schema checks in {validator}."
},
"non-matching-header": {
"name": "Non-Matching Header",
"type": "schema",
"context": "head",
"weight": 9,
"message": "Header in column {column_number} doesn't match field name {field_name} in the schema",
"description": "One of the data source headers doesn't match the field name defined in the schema.\n\n How it could be resolved:\n - Rename header in the data source or field in the schema\n - If this error should be ignored disable `non-matching-header` check in {validator}."
},
"extra-header": {
"name": "Extra Header",
"type": "schema",
"context": "head",
"weight": 9,
"message": "There is an extra header in column {column_number}",
"description": "The first row of the data source contains header that doesn't exist in the schema.\n\n How it could be resolved:\n - Remove the extra column from the data source or add the missing field to the schema\n - If this error should be ignored disable `extra-header` check in {validator}."
},
"missing-header": {
"name": "Missing Header",
"type": "schema",
"context": "head",
"weight": 9,
"message": "There is a missing header in column {column_number}",
"description": "Based on the schema there should be a header that is missing in the first row of the data source.\n\n How it could be resolved:\n - Add the missing column to the data source or remove the extra field from the schema\n - If this error should be ignored disable `missing-header` check in {validator}."
},
"type-or-format-error": {
"name": "Type or Format Error",
"type": "schema",
"context": "body",
"weight": 9,
"message": "The value {value} in row {row_number} and column {column_number} is not type {field_type} and format {field_format}",
"description": "The value does not match the schema type and format for this field.\n\n How it could be resolved:\n - If this value is not correct, update the value.\n - If this value is correct, adjust the type and/or format.\n - To ignore the error, disable the `type-or-format-error` check in {validator}. In this case all schema checks for row values will be ignored."
},
"required-constraint": {
"name": "Required Constraint",
"type": "schema",
"context": "body",
"weight": 9,
"message": "Column {column_number} is a required field, but row {row_number} has no value",
"description": "This field is a required field, but it contains no value.\n\n How it could be resolved:\n - If this value is not correct, update the value.\n - If value is correct, then remove the `required` constraint from the schema.\n - If this error should be ignored disable `required-constraint` check in {validator}."
},
"pattern-constraint": {
"name": "Pattern Constraint",
"type": "schema",
"context": "body",
"weight": 7,
"message": "The value {value} in row {row_number} and column {column_number} does not conform to the pattern constraint of {constraint}",
"description": "This field value should conform to constraint pattern.\n\n How it could be resolved:\n - If this value is not correct, update the value.\n - If value is correct, then remove or refine the `pattern` constraint in the schema.\n - If this error should be ignored disable `pattern-constraint` check in {validator}."
},
"unique-constraint": {
"name": "Unique Constraint",
"type": "schema",
"context": "body",
"weight": 9,
"message": "Rows {row_numbers} has unique constraint violation in column {column_number}",
"description": "This field is a unique field but it contains a value that has been used in another row.\n\n How it could be resolved:\n - If this value is not correct, update the value.\n - If value is correct, then the values in this column are not unique. Remove the `unique` constraint from the schema.\n - If this error should be ignored disable `unique-constraint` check in {validator}."
},
"enumerable-constraint": {
"name": "Enumerable Constraint",
"type": "schema",
"context": "body",
"weight": 7,
"message": "The value {value} in row {row_number} and column {column_number} does not conform to the given enumeration: {constraint}",
"description": "This field value should be equal to one of the values in the enumeration constraint.\n\n How it could be resolved:\n - If this value is not correct, update the value.\n - If value is correct, then remove or refine the `enum` constraint in the schema.\n - If this error should be ignored disable `enumerable-constraint` check in {validator}."
},
"minimum-constraint": {
"name": "Minimum Constraint",
"type": "schema",
"context": "body",
"weight": 7,
"message": "The value {value} in row {row_number} and column {column_number} does not conform to the minimum constraint of {constraint}",
"description": "This field value should be greater or equal than constraint value.\n\n How it could be resolved:\n - If this value is not correct, update the value.\n - If value is correct, then remove or refine the `minimum` constraint in the schema.\n - If this error should be ignored disable `minimum-constraint` check in {validator}."
},
"maximum-constraint": {
"name": "Maximum Constraint",
"type": "schema",
"context": "body",
"weight": 7,
"message": "The value {value} in row {row_number} and column {column_number} does not conform to the maximum constraint of {constraint}",
"description": "This field value should be less or equal than constraint value.\n\n How it could be resolved:\n - If this value is not correct, update the value.\n - If value is correct, then remove or refine the `maximum` constraint in the schema.\n - If this error should be ignored disable `maximum-constraint` check in {validator}."
},
"minimum-length-constraint": {
"name": "Minimum Length Constraint",
"type": "schema",
"context": "body",
"weight": 7,
"message": "The value {value} in row {row_number} and column {column_number} does not conform to the minimum length constraint of {constraint}",
"description": "A lenght of this field value should be greater or equal than schema constraint value.\n\n How it could be resolved:\n - If this value is not correct, update the value.\n - If value is correct, then remove or refine the `minimumLength` constraint in the schema.\n - If this error should be ignored disable `minimum-length-constraint` check in {validator}."
},
"maximum-length-constraint": {
"name": "Maximum Length Constraint",
"type": "schema",
"context": "body",
"weight": 7,
"message": "The value {value} in row {row_number} and column {column_number} does not conform to the maximum length constraint of {constraint}",
"description": "A lenght of this field value should be less or equal than schema constraint value.\n\n How it could be resolved:\n - If this value is not correct, update the value.\n - If value is correct, then remove or refine the `maximumLength` constraint in the schema.\n - If this error should be ignored disable `maximum-length-constraint` check in {validator}."
}
}
}

View File

@ -1,16 +0,0 @@
{
"image": "",
"license": "",
"last_updated": "",
"keywords": [],
"sources": [{
"web": "",
"name": "",
"title": ""
}],
"name": "",
"description": "",
"resources": [],
"title": "",
"version": ""
}

View File

@ -37,7 +37,6 @@ import java.io.IOException;
import java.io.InputStream;
import java.time.LocalDateTime;
import java.time.ZoneId;
import java.time.temporal.ChronoUnit;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
@ -46,7 +45,7 @@ import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.apache.commons.lang.exception.ExceptionUtils;
import org.apache.tools.tar.TarOutputStream;
import org.json.JSONArray;
import org.json.JSONException;
@ -58,8 +57,6 @@ import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.refine.history.HistoryEntryManager;
import com.google.refine.model.Project;
import com.google.refine.model.metadata.IMetadata;
import com.google.refine.model.metadata.ProjectMetadata;
import com.google.refine.preference.PreferenceStore;
import com.google.refine.preference.TopList;
@ -78,6 +75,7 @@ public abstract class ProjectManager {
// Don't spend more than this much time saving projects if doing a quick save
static protected final int QUICK_SAVE_MAX_TIME = 1000 * 30; // 30 secs
protected Map<Long, ProjectMetadata> _projectsMetadata;
protected Map<String, Integer> _projectsTags;// TagName, number of projects having that tag
protected PreferenceStore _preferenceStore;
@ -103,8 +101,8 @@ public abstract class ProjectManager {
transient protected Map<Long, Project> _projects;
static public ProjectManager singleton;
protected ProjectManager() {
protected ProjectManager(){
_projectsMetadata = new HashMap<Long, ProjectMetadata>();
_preferenceStore = new PreferenceStore();
_projects = new HashMap<Long, Project>();
@ -195,7 +193,7 @@ public abstract class ProjectManager {
} catch (Exception e) {
e.printStackTrace();
}
}
}//FIXME what should be the behaviour if metadata is null? i.e. not found
Project project = getProject(id);
if (project != null && metadata != null && metadata.getModified().isAfter(project.getLastSave())) {
@ -204,7 +202,8 @@ public abstract class ProjectManager {
} catch (Exception e) {
e.printStackTrace();
}
}
}//FIXME what should be the behaviour if project is null? i.e. not found or loaded.
//FIXME what should happen if the metadata is found, but not the project? or vice versa?
}
}
@ -215,7 +214,7 @@ public abstract class ProjectManager {
* @param projectId
* @throws Exception
*/
public abstract void saveMetadata(IMetadata metadata, long projectId) throws Exception;
public abstract void saveMetadata(ProjectMetadata metadata, long projectId) throws Exception;
/**
* Save project to the data store
@ -268,23 +267,23 @@ public abstract class ProjectManager {
Project project = _projects.get(id); // don't call getProject() as that will load the project.
if (project != null) {
LocalDateTime projectLastSaveTime = project.getLastSave();
boolean hasUnsavedChanges =
!metadata.getModified().isBefore(projectLastSaveTime);
metadata.getModified().atZone(ZoneId.systemDefault()).toInstant().toEpochMilli() >= project.getLastSave().atZone(ZoneId.systemDefault()).toInstant().toEpochMilli();
// We use >= instead of just > to avoid the case where a newly created project
// has the same modified and last save times, resulting in the project not getting
// saved at all.
if (hasUnsavedChanges) {
long msecsOverdue = ChronoUnit.MILLIS.between(projectLastSaveTime, startTimeOfSave);
long msecsOverdue = startTimeOfSave.atZone(ZoneId.systemDefault()).toInstant().toEpochMilli() - project.getLastSave().atZone(ZoneId.systemDefault()).toInstant().toEpochMilli();
records.add(new SaveRecord(project, msecsOverdue));
} else if (!project.getProcessManager().hasPending()
&& ChronoUnit.MILLIS.between(projectLastSaveTime, startTimeOfSave) > PROJECT_FLUSH_DELAY) {
&& startTimeOfSave.atZone(ZoneId.systemDefault()).toInstant().toEpochMilli() - project.getLastSave().atZone(ZoneId.systemDefault()).toInstant().toEpochMilli() > PROJECT_FLUSH_DELAY) {
/*
* It's been a while since the project was last saved and it hasn't been
* modified. We can safely remove it from the cache to save some memory.
* It's been a while since the project was last saved and it hasn't been
* modified. We can safely remove it from the cache to save some memory.
*/
_projects.remove(id).dispose();
}
@ -310,10 +309,13 @@ public abstract class ProjectManager {
"Saving all modified projects ..." :
"Saving some modified projects ..."
);
for (int i = 0;i < records.size() &&
(allModified || (ChronoUnit.MILLIS.between(startTimeOfSave, LocalDateTime.now()) < QUICK_SAVE_MAX_TIME));
for (int i = 0;
i < records.size() &&
(allModified || (LocalDateTime.now().atZone(ZoneId.systemDefault()).toInstant().toEpochMilli() -
startTimeOfSave.atZone(ZoneId.systemDefault()).toInstant().toEpochMilli() < QUICK_SAVE_MAX_TIME));
i++) {
try {
saveProject(records.get(i).project);
} catch (Exception e) {
@ -352,14 +354,14 @@ public abstract class ProjectManager {
/**
* Gets the project metadata from memory
* Requires that the metadata has already been loaded from the data store.
* Requires that the metadata has already been loaded from the data store
* @param id
* @return
*/
public ProjectMetadata getProjectMetadata(long id) {
return _projectsMetadata.get(id);
}
/**
* Gets the project metadata from memory
* Requires that the metadata has already been loaded from the data store
@ -369,7 +371,7 @@ public abstract class ProjectManager {
public ProjectMetadata getProjectMetadata(String name) {
for (ProjectMetadata pm : _projectsMetadata.values()) {
if (pm.getName().equals(name)) {
return pm;
return pm;
}
}
return null;
@ -421,7 +423,7 @@ public abstract class ProjectManager {
userMetadataPreference = new JSONArray(userMeta);
} catch (JSONException e1) {
logger.warn("wrong definition of userMetadata format. Please use form [{\"name\": \"client name\", \"display\":true}, {\"name\": \"progress\", \"display\":false}]");
logger.error(ExceptionUtils.getStackTrace(e1));
logger.error(ExceptionUtils.getFullStackTrace(e1));
}
for (int index = 0; index < userMetadataPreference.length(); index++) {
@ -466,7 +468,7 @@ public abstract class ProjectManager {
JSONObject projectMetaJsonObj = jsonObjArray.getJSONObject(index);
projectMetaJsonObj.put("display", false);
} catch (JSONException e) {
logger.error(ExceptionUtils.getStackTrace(e));
logger.error(ExceptionUtils.getFullStackTrace(e));
}
}
}
@ -486,7 +488,7 @@ public abstract class ProjectManager {
/**
* Gets all the project tags currently held in memory
*
*
* @return
*/
@JsonIgnore
@ -494,7 +496,6 @@ public abstract class ProjectManager {
return _projectsTags;
}
/**
* Gets the required project from the data store
* If project does not already exist in memory, it is loaded from the data store
@ -603,9 +604,8 @@ public abstract class ProjectManager {
*
* @param ps
*/
public static void preparePreferenceStore(PreferenceStore ps) {
static protected void preparePreferenceStore(PreferenceStore ps) {
ps.put("scripting.expressions", new TopList(s_expressionHistoryMax));
ps.put("scripting.starred-expressions", new TopList(Integer.MAX_VALUE));
}
}

View File

@ -0,0 +1,319 @@
/*
Copyright 2010, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import java.lang.reflect.Field;
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import org.apache.commons.lang.exception.ExceptionUtils;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.core.JsonParseException;
import com.fasterxml.jackson.databind.JsonMappingException;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.refine.preference.PreferenceStore;
import com.google.refine.util.ParsingUtilities;
public class ProjectMetadata {
public final static String DEFAULT_FILE_NAME = "metadata.json";
public final static String TEMP_FILE_NAME = "metadata.temp.json";
public final static String OLD_FILE_NAME = "metadata.old.json";
private final LocalDateTime _created;
private LocalDateTime _modified;
private LocalDateTime written = null;
private String _name = "";
private String _password = "";
private String _encoding = "";
private int _encodingConfidence;
private String[] _tags = new String[0];
private String _creator = "";
private String _contributors = "";
private String _subject = ""; // Several refine projects may be linked
private String _description = ""; // free form of comment
private int _rowCount; // at the creation. Essential for cleaning old projects too heavy
// import options is an array for 1-n data sources
private JSONArray _importOptionMetadata = new JSONArray();
// user metadata
private JSONArray _userMetadata = new JSONArray();
private Map<String, Serializable> _customMetadata = new HashMap<String, Serializable>();
private PreferenceStore _preferenceStore = new PreferenceStore();
private final static Logger logger = LoggerFactory.getLogger("project_metadata");
protected ProjectMetadata(LocalDateTime date) {
_created = date;
preparePreferenceStore(_preferenceStore);
}
public ProjectMetadata() {
this(LocalDateTime.now());
_modified = _created;
}
public ProjectMetadata(LocalDateTime created, LocalDateTime modified, String name) {
this(created);
_modified = modified;
_name = name;
}
private boolean isSaveMode(Properties options) {
return "save".equals(options.getProperty("mode"));
}
public boolean isDirty() {
return written == null || _modified.isAfter(written);
}
static protected void preparePreferenceStore(PreferenceStore ps) {
ProjectManager.preparePreferenceStore(ps);
// Any project specific preferences?
}
public LocalDateTime getCreated() {
return _created;
}
public void setName(String name) {
this._name = name;
updateModified();
}
public String getName() {
return _name;
}
public void setEncoding(String encoding) {
this._encoding = encoding;
updateModified();
}
public String getEncoding() {
return _encoding;
}
public void setEncodingConfidence(int confidence) {
this._encodingConfidence = confidence;
updateModified();
}
public void setEncodingConfidence(String confidence) {
if (confidence != null) {
this.setEncodingConfidence(Integer.parseInt(confidence));
}
}
public int getEncodingConfidence() {
return _encodingConfidence;
}
public void setTags(String[] tags) {
if (tags != null) {
List<String> tmpTags = new ArrayList<String>(tags.length);
for (String tag : tags) {
if (tag != null) {
String trimmedTag = tag.trim();
if (!trimmedTag.isEmpty()) {
tmpTags.add(trimmedTag);
}
}
}
this._tags = tmpTags.toArray(new String[tmpTags.size()]);
} else {
this._tags = tags;
}
updateModified();
}
public String[] getTags() {
if (_tags == null) this._tags = new String[0];
return _tags;
}
public void setPassword(String password) {
this._password = password;
updateModified();
}
public String getPassword() {
return _password;
}
public LocalDateTime getModified() {
return _modified;
}
public void updateModified() {
_modified = LocalDateTime.now();
}
public PreferenceStore getPreferenceStore() {
return _preferenceStore;
}
public Serializable getCustomMetadata(String key) {
return _customMetadata.get(key);
}
public void setCustomMetadata(String key, Serializable value) {
if (value == null) {
_customMetadata.remove(key);
} else {
_customMetadata.put(key, value);
}
updateModified();
}
public JSONArray getImportOptionMetadata() {
return _importOptionMetadata;
}
public void setImportOptionMetadata(JSONArray jsonArray) {
_importOptionMetadata = jsonArray;
updateModified();
}
public void appendImportOptionMetadata(ObjectNode options) {
_importOptionMetadata.put(options);
updateModified();
}
public String getCreator() {
return _creator;
}
public void setCreator(String creator) {
this._creator = creator;
updateModified();
}
public String getContributors() {
return _contributors;
}
public void setContributors(String contributors) {
this._contributors = contributors;
updateModified();
}
public String getSubject() {
return _subject;
}
public void setSubject(String subject) {
this._subject = subject;
updateModified();
}
public String getDescription() {
return _description;
}
public void setDescription(String description) {
this._description = description;
updateModified();
}
public int getRowCount() {
return _rowCount;
}
public void setRowCount(int rowCount) {
this._rowCount = rowCount;
updateModified();
}
public JSONArray getUserMetadata() {
return _userMetadata;
}
public void setUserMetadata(JSONArray userMetadata) {
this._userMetadata = userMetadata;
}
private void updateUserMetadata(String metaName, String valueString) {
for (int i = 0; i < _userMetadata.length(); i++) {
try {
JSONObject obj = _userMetadata.getJSONObject(i);
if (obj.getString("name").equals(metaName)) {
obj.put("value", valueString);
}
} catch (JSONException e) {
logger.error(ExceptionUtils.getFullStackTrace(e));
}
}
}
public void setAnyField(String metaName, String valueString) {
Class<? extends ProjectMetadata> metaClass = this.getClass();
try {
Field metaField = metaClass.getDeclaredField("_" + metaName);
if (metaName.equals("tags")) {
metaField.set(this, valueString.split(","));
} else {
metaField.set(this, valueString);
}
} catch (NoSuchFieldException e) {
updateUserMetadata(metaName, valueString);
} catch (SecurityException | IllegalArgumentException | IllegalAccessException e) {
logger.error(ExceptionUtils.getFullStackTrace(e));
}
}
public static ProjectMetadata loadFromStream(InputStream f) throws IOException {
return ParsingUtilities.mapper.readValue(f, ProjectMetadata.class);
}
}

View File

@ -37,7 +37,7 @@ import java.util.Iterator;
import java.util.TreeSet;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang.StringUtils;
public class FingerprintKeyer extends Keyer {

View File

@ -52,12 +52,12 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.core.JsonGenerator;
import com.google.refine.ProjectManager;
import com.google.refine.ProjectMetadata;
import com.google.refine.RefineServlet;
import com.google.refine.browsing.Engine;
import com.google.refine.browsing.EngineConfig;
import com.google.refine.history.HistoryEntry;
import com.google.refine.model.Project;
import com.google.refine.model.metadata.ProjectMetadata;
import com.google.refine.process.Process;
import com.google.refine.util.ParsingUtilities;
@ -196,7 +196,7 @@ public abstract class Command {
* @return
* @throws ServletException
*/
protected ProjectMetadata getMetadata(HttpServletRequest request) throws ServletException {
protected ProjectMetadata getProjectMetadata(HttpServletRequest request) throws ServletException {
if (request == null) {
throw new IllegalArgumentException("parameter 'request' should not be null");
}
@ -320,20 +320,7 @@ public abstract class Command {
w.flush();
w.close();
}
static protected void respondJSONObject(
HttpServletResponse response, JSONObject o)
throws IOException, JSONException {
response.setCharacterEncoding("UTF-8");
response.setHeader("Content-Type", "application/json");
response.setHeader("Cache-Control", "no-cache");
Writer w = response.getWriter();
w.append(o.toString());
w.flush();
w.close();
}
static protected void respondException(HttpServletResponse response, Exception e)
throws IOException, ServletException {

View File

@ -51,7 +51,9 @@ public class SetPreferenceCommand extends Command {
throws ServletException, IOException {
Project project = request.getParameter("project") != null ? getProject(request) : null;
PreferenceStore ps = ProjectManager.singleton.getPreferenceStore();
PreferenceStore ps = project != null ?
project.getMetadata().getPreferenceStore() :
ProjectManager.singleton.getPreferenceStore();
String prefName = request.getParameter("name");
String valueString = request.getParameter("value");

View File

@ -83,7 +83,6 @@ public class GetExpressionHistoryCommand extends Command {
throws ServletException, IOException {
try {
List<String> expressions = toExpressionList(ProjectManager.singleton.getPreferenceStore().get("scripting.expressions"));
Set<String> starredExpressions = new HashSet<String>(((TopList)ProjectManager.singleton.getPreferenceStore().get("scripting.starred-expressions")).getList());
ExpressionsList expressionsList = new ExpressionsList(expressions.stream()

View File

@ -52,9 +52,6 @@ public class LogExpressionCommand extends Command {
try {
String expression = request.getParameter("expression");
((TopList) ProjectManager.singleton.getPreferenceStore().get("scripting.expressions"))
.add(expression);
((TopList) ProjectManager.singleton.getPreferenceStore().get("scripting.expressions"))
.add(expression);

View File

@ -41,8 +41,8 @@ import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import com.google.refine.ProjectManager;
import com.google.refine.ProjectMetadata;
import com.google.refine.commands.Command;
import com.google.refine.model.metadata.ProjectMetadata;
public class DeleteProjectCommand extends Command {

View File

@ -1,48 +0,0 @@
package com.google.refine.commands.project;
import java.io.IOException;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.everit.json.schema.ValidationException;
import org.json.JSONException;
import com.google.refine.commands.Command;
import com.google.refine.model.Project;
import com.google.refine.model.metadata.IMetadata;
import com.google.refine.model.metadata.MetadataFactory;
import com.google.refine.model.metadata.MetadataFormat;
public class GetMetadataCommand extends Command {
@Override
public void doGet(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
try {
Project project;
MetadataFormat metadataFormat;
try {
project = getProject(request);
metadataFormat = MetadataFormat.valueOf(request.getParameter("metadataFormat"));
} catch (ServletException e) {
respond(response, "error", e.getLocalizedMessage());
return;
}
// for now, only the data package metadata is supported.
if (metadataFormat != MetadataFormat.DATAPACKAGE_METADATA) {
respond(response, "error", "metadata format is not supported");
return;
}
IMetadata metadata = MetadataFactory.buildDataPackageMetadata(project);
respondJSON(response, metadata);
} catch (JSONException e) {
respondException(response, e);
} catch (ValidationException e) {
respondException(response, e);
}
}
}

View File

@ -51,9 +51,9 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.refine.ProjectManager;
import com.google.refine.ProjectMetadata;
import com.google.refine.commands.Command;
import com.google.refine.model.Project;
import com.google.refine.model.metadata.ProjectMetadata;
import com.google.refine.util.ParsingUtilities;
public class ImportProjectCommand extends Command {

View File

@ -1,83 +0,0 @@
package com.google.refine.commands.project;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.zip.GZIPOutputStream;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.apache.commons.io.IOUtils;
import org.apache.tools.tar.TarOutputStream;
import com.google.refine.ProjectManager;
import com.google.refine.browsing.Engine;
import com.google.refine.commands.Command;
import com.google.refine.exporters.CsvExporter;
import com.google.refine.model.Project;
import com.google.refine.model.metadata.DataPackageMetadata;
import com.google.refine.model.metadata.PackageExtension;
public class PackageProjectCommand extends Command {
@Override
public void doPost(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
ProjectManager.singleton.setBusy(true);
try {
// get the metadata
String metadata = request.getParameter("metadata");
InputStream in = IOUtils.toInputStream(metadata, "UTF-8");
Project project = getProject(request);
Engine engine = getEngine(request, project);
// ensure project get saved
DataPackageMetadata dpm = new DataPackageMetadata();
dpm.loadFromStream(in);
ProjectManager.singleton.ensureProjectSaved(project.id);
// export project
CsvExporter exporter = new CsvExporter();
ByteArrayOutputStream baos = new ByteArrayOutputStream();
Writer outputStreamWriter = new OutputStreamWriter(baos);
exporter.export(project, null, engine, outputStreamWriter);
OutputStream os = response.getOutputStream();
try {
PackageExtension.saveZip(dpm.getPackage(), baos, os);
response.setHeader("Content-Type", "application/x-gzip");
} finally {
outputStreamWriter.close();
os.close();
}
} catch (Exception e) {
respondException(response, e);
} finally {
ProjectManager.singleton.setBusy(false);
}
}
protected void gzipTarToOutputStream(Project project, OutputStream os) throws IOException {
GZIPOutputStream gos = new GZIPOutputStream(os);
try {
tarToOutputStream(project, gos);
} finally {
gos.close();
}
}
protected void tarToOutputStream(Project project, OutputStream os) throws IOException {
TarOutputStream tos = new TarOutputStream(os);
try {
ProjectManager.singleton.exportProject(project.id, tos);
} finally {
tos.close();
}
}
}

View File

@ -39,8 +39,8 @@ import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import com.google.refine.ProjectMetadata;
import com.google.refine.commands.Command;
import com.google.refine.model.metadata.ProjectMetadata;
public class RenameProjectCommand extends Command {
@Override
@ -49,7 +49,7 @@ public class RenameProjectCommand extends Command {
try {
String name = request.getParameter("name");
ProjectMetadata pm = getMetadata(request);
ProjectMetadata pm = getProjectMetadata(request);
pm.setName(name);

View File

@ -9,14 +9,15 @@ import javax.servlet.http.HttpServletResponse;
import org.json.JSONException;
import com.google.refine.ProjectManager;
import com.google.refine.ProjectMetadata;
import com.google.refine.commands.Command;
import com.google.refine.model.Project;
import com.google.refine.model.metadata.ProjectMetadata;
public class SetProjectMetadataCommand extends Command {
@Override
public void doPost(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
Project project = request.getParameter("project") != null ? getProject(request) : null;
String metaName = request.getParameter("name");
String valueString = request.getParameter("value");
@ -32,7 +33,7 @@ public class SetProjectMetadataCommand extends Command {
response.setCharacterEncoding("UTF-8");
response.setHeader("Content-Type", "application/json");
meta.setAnyStringField(metaName, valueString);
meta.setAnyField(metaName, valueString);
ProjectManager.singleton.saveMetadata(meta, project.id);
respond(response, "{ \"code\" : \"ok\" }");

View File

@ -37,9 +37,9 @@ import javax.servlet.http.HttpServletResponse;
import org.json.JSONException;
import com.google.refine.ProjectManager;
import com.google.refine.ProjectMetadata;
import com.google.refine.commands.Command;
import com.google.refine.model.Project;
import com.google.refine.model.metadata.ProjectMetadata;
public class SetProjectTagsCommand extends Command {
@Override

View File

@ -1,42 +0,0 @@
package com.google.refine.commands.project;
import java.io.IOException;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.json.JSONException;
import org.json.JSONObject;
import com.google.refine.ProjectManager;
import com.google.refine.commands.Command;
import com.google.refine.model.Project;
import com.google.refine.model.metadata.validator.ValidateOperation;
import com.google.refine.util.ParsingUtilities;
public class ValidateSchemaCommand extends Command {
@Override
public void doPost(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
ProjectManager.singleton.setBusy(true);
try {
Project project = getProject(request);
JSONObject optionObj = ParsingUtilities.evaluateJsonStringToObject(
request.getParameter("options"));
new ValidateOperation(project, optionObj).startProcess();
respond(response, "{ \"code\" : \"ok\" }");
} catch (JSONException e) {
respondException(response, e);
} catch (ServletException e) {
respond(response, "error", e.getLocalizedMessage());
return;
} finally {
ProjectManager.singleton.setBusy(false);
}
}
}

View File

@ -226,9 +226,9 @@ public class GetRowsCommand extends Command {
}
// metadata refresh for row mode and record mode
if (project.getMetadata() != null) {
project.getMetadata().setRowCount(project.rows.size());
}
if (project.getMetadata() != null) {
project.getMetadata().setRowCount(project.rows.size());
}
} catch (Exception e) {
respondException(response, e);
}

View File

@ -47,8 +47,8 @@ import com.fasterxml.jackson.annotation.JsonInclude.Include;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonRawValue;
import com.google.refine.ProjectManager;
import com.google.refine.ProjectMetadata;
import com.google.refine.commands.Command;
import com.google.refine.model.metadata.ProjectMetadata;
public class GetAllProjectMetadataCommand extends Command {
public static class AllProjectMetadata {

View File

@ -37,11 +37,11 @@ import java.io.UnsupportedEncodingException;
import java.util.Properties;
import com.google.refine.ProjectManager;
import com.google.refine.ProjectMetadata;
import com.google.refine.expr.EvalError;
import com.google.refine.grel.ControlFunctionRegistry;
import com.google.refine.grel.Function;
import com.google.refine.model.Project;
import com.google.refine.model.metadata.ProjectMetadata;
public class Reinterpret implements Function {

View File

@ -33,7 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package com.google.refine.grel.controls;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang.StringUtils;
public class IsNumeric extends IsTest {
@Override

View File

@ -44,7 +44,7 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.apache.commons.lang.exception.ExceptionUtils;
import org.apache.poi.POIXMLDocument;
import org.apache.poi.POIXMLException;
import org.apache.poi.common.usermodel.Hyperlink;
@ -60,6 +60,7 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.refine.ProjectMetadata;
import com.google.refine.importing.ImportingJob;
import com.google.refine.importing.ImportingUtilities;
import com.google.refine.model.Cell;
@ -67,7 +68,6 @@ import com.google.refine.model.Project;
import com.google.refine.model.Recon;
import com.google.refine.model.Recon.Judgment;
import com.google.refine.model.ReconCandidate;
import com.google.refine.model.metadata.ProjectMetadata;
import com.google.refine.util.JSONUtilities;
import com.google.refine.util.ParsingUtilities;
@ -192,7 +192,7 @@ public class ExcelImporter extends TabularImportingParserBase {
// value is fileName#sheetIndex
fileNameAndSheetIndex = sheetObj.get("fileNameAndSheetIndex").asText().split("#");
} catch (JSONException e) {
logger.error(ExceptionUtils.getStackTrace(e));
logger.error(ExceptionUtils.getFullStackTrace(e));
}
if (!fileNameAndSheetIndex[0].equals(fileSource))

View File

@ -16,7 +16,9 @@ import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.refine.importing.ImportingJob;
import com.google.refine.importing.ImportingUtilities;
import com.google.refine.model.Project;
import com.google.refine.model.metadata.ProjectMetadata;
import com.google.refine.ProjectMetadata;
import com.google.refine.util.JSONUtilities;
import com.google.refine.util.ParsingUtilities;

View File

@ -44,6 +44,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.refine.ProjectMetadata;
import com.google.refine.importers.ImporterUtilities.MultiFileReadingProgress;
import com.google.refine.importing.ImportingJob;
import com.google.refine.importing.ImportingParser;
@ -51,7 +52,6 @@ import com.google.refine.importing.ImportingUtilities;
import com.google.refine.model.Column;
import com.google.refine.model.ModelException;
import com.google.refine.model.Project;
import com.google.refine.model.metadata.ProjectMetadata;
import com.google.refine.util.JSONUtilities;
import com.google.refine.util.ParsingUtilities;

View File

@ -47,6 +47,7 @@ import com.fasterxml.jackson.core.JsonParseException;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonParser.NumberType;
import com.fasterxml.jackson.core.JsonToken;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.BooleanNode;
@ -54,6 +55,8 @@ import com.fasterxml.jackson.databind.node.DoubleNode;
import com.fasterxml.jackson.databind.node.LongNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.fasterxml.jackson.databind.node.TextNode;
import com.google.refine.ProjectMetadata;
import com.google.refine.importers.tree.ImportColumnGroup;
import com.google.refine.importers.tree.TreeImportingParserBase;
import com.google.refine.importers.tree.TreeReader;
@ -61,7 +64,7 @@ import com.google.refine.importers.tree.TreeReaderException;
import com.google.refine.importing.ImportingJob;
import com.google.refine.importing.ImportingUtilities;
import com.google.refine.model.Project;
import com.google.refine.model.metadata.ProjectMetadata;
import com.google.refine.util.JSONUtilities;
import com.google.refine.util.ParsingUtilities;

View File

@ -12,7 +12,7 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.refine.importing.ImportingJob;
import com.google.refine.model.Project;
import com.google.refine.model.metadata.ProjectMetadata;
import com.google.refine.ProjectMetadata;
import com.google.refine.util.JSONUtilities;
public class LineBasedImporter extends TabularImportingParserBase {

View File

@ -44,7 +44,7 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.apache.commons.lang.exception.ExceptionUtils;
import org.json.JSONException;
import org.odftoolkit.odfdom.doc.OdfDocument;
import org.odftoolkit.odfdom.doc.table.OdfTable;
@ -55,6 +55,7 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.refine.ProjectMetadata;
import com.google.refine.importing.ImportingJob;
import com.google.refine.importing.ImportingUtilities;
import com.google.refine.model.Cell;
@ -62,7 +63,6 @@ import com.google.refine.model.Project;
import com.google.refine.model.Recon;
import com.google.refine.model.Recon.Judgment;
import com.google.refine.model.ReconCandidate;
import com.google.refine.model.metadata.ProjectMetadata;
import com.google.refine.util.JSONUtilities;
import com.google.refine.util.ParsingUtilities;
@ -151,7 +151,7 @@ public class OdsImporter extends TabularImportingParserBase {
// value is fileName#sheetIndex
fileNameAndSheetIndex = sheetObj.get("fileNameAndSheetIndex").asText().split("#");
} catch (JSONException e) {
logger.error(ExceptionUtils.getStackTrace(e));
logger.error(ExceptionUtils.getFullStackTrace(e));
}
if (!fileNameAndSheetIndex[0].equals(fileSource))

View File

@ -46,6 +46,7 @@ import org.apache.jena.rdf.model.Statement;
import org.apache.jena.rdf.model.StmtIterator;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.refine.ProjectMetadata;
import com.google.refine.expr.ExpressionUtils;
import com.google.refine.importing.ImportingJob;
import com.google.refine.model.Cell;
@ -53,7 +54,7 @@ import com.google.refine.model.Column;
import com.google.refine.model.ModelException;
import com.google.refine.model.Project;
import com.google.refine.model.Row;
import com.google.refine.model.metadata.ProjectMetadata;
public class RdfTripleImporter extends ImportingParserBase {
private Mode mode;

View File

@ -52,10 +52,10 @@ import java.util.Map;
import org.apache.commons.lang3.StringEscapeUtils;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.refine.ProjectMetadata;
import com.google.refine.importing.ImportingJob;
import com.google.refine.importing.ImportingUtilities;
import com.google.refine.model.Project;
import com.google.refine.model.metadata.ProjectMetadata;
import com.google.refine.util.JSONUtilities;
import au.com.bytecode.opencsv.CSVParser;

View File

@ -40,13 +40,13 @@ import java.util.ArrayList;
import java.util.List;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.refine.ProjectMetadata;
import com.google.refine.expr.ExpressionUtils;
import com.google.refine.importing.ImportingJob;
import com.google.refine.model.Cell;
import com.google.refine.model.Column;
import com.google.refine.model.Project;
import com.google.refine.model.Row;
import com.google.refine.model.metadata.ProjectMetadata;
import com.google.refine.util.JSONUtilities;
abstract public class TabularImportingParserBase extends ImportingParserBase {

View File

@ -52,20 +52,22 @@ import org.sweble.wikitext.parser.utils.SimpleParserConfig;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.common.io.CharStreams;
import xtc.parser.ParseException;
import com.google.refine.ProjectMetadata;
import com.google.refine.importing.ImportingJob;
import com.google.refine.model.Cell;
import com.google.refine.model.Column;
import com.google.refine.model.Project;
import com.google.refine.model.Recon;
import com.google.refine.model.ReconStats;
import com.google.refine.model.metadata.ProjectMetadata;
import com.google.refine.model.recon.ReconJob;
import com.google.refine.model.recon.StandardReconConfig;
import com.google.refine.model.recon.StandardReconConfig.ColumnDetail;
import com.google.refine.util.JSONUtilities;
import de.fau.cs.osr.ptk.common.AstVisitor;
import xtc.parser.ParseException;
public class WikitextImporter extends TabularImportingParserBase {

View File

@ -51,6 +51,8 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.refine.ProjectMetadata;
import com.google.refine.importers.tree.ImportColumnGroup;
import com.google.refine.importers.tree.TreeImportingParserBase;
import com.google.refine.importers.tree.TreeReader;
@ -58,7 +60,7 @@ import com.google.refine.importers.tree.TreeReaderException;
import com.google.refine.importing.ImportingJob;
import com.google.refine.importing.ImportingUtilities;
import com.google.refine.model.Project;
import com.google.refine.model.metadata.ProjectMetadata;
import com.google.refine.util.JSONUtilities;
import com.google.refine.util.ParsingUtilities;

View File

@ -39,16 +39,16 @@ import java.io.InputStream;
import java.io.Reader;
import java.util.List;
import org.apache.commons.lang3.NotImplementedException;
import org.apache.commons.lang.NotImplementedException;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.refine.ProjectMetadata;
import com.google.refine.importers.ImporterUtilities;
import com.google.refine.importers.ImporterUtilities.MultiFileReadingProgress;
import com.google.refine.importers.ImportingParserBase;
import com.google.refine.importing.ImportingJob;
import com.google.refine.importing.ImportingUtilities;
import com.google.refine.model.Project;
import com.google.refine.model.metadata.ProjectMetadata;
import com.google.refine.util.JSONUtilities;
/**
@ -154,7 +154,7 @@ abstract public class TreeImportingParserBase extends ImportingParserBase {
ObjectNode options,
List<Exception> exceptions
) {
throw new NotImplementedException("project ID:" + project.id);
throw new NotImplementedException();
}
/**

View File

@ -46,8 +46,8 @@ import com.fasterxml.jackson.annotation.JsonRawValue;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.refine.ProjectManager;
import com.google.refine.ProjectMetadata;
import com.google.refine.model.Project;
import com.google.refine.model.metadata.ProjectMetadata;
import com.google.refine.util.JSONUtilities;
import com.google.refine.util.ParsingUtilities;
@ -146,15 +146,6 @@ public class ImportingJob {
}
}
/**
* TO check if the file record is a metadata file entry
* @param fileRecordObject
* @return ObjectNode
*/
public boolean isMetadataFileRecord(ObjectNode fileRecordObject) {
return fileRecordObject.has("metaDataFormat");
}
@JsonIgnore
public List<ObjectNode> getSelectedFileRecords() {
List<ObjectNode> results = new ArrayList<ObjectNode>();

View File

@ -37,7 +37,8 @@ import java.util.List;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.refine.model.Project;
import com.google.refine.model.metadata.ProjectMetadata;
import com.google.refine.ProjectMetadata;
public interface ImportingParser {
/**

View File

@ -42,7 +42,6 @@ import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLConnection;
import java.text.NumberFormat;
@ -50,11 +49,9 @@ import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.stream.Collectors;
import java.util.zip.GZIPInputStream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
@ -68,14 +65,10 @@ import org.apache.commons.fileupload.ProgressListener;
import org.apache.commons.fileupload.disk.DiskFileItemFactory;
import org.apache.commons.fileupload.servlet.ServletFileUpload;
import org.apache.commons.fileupload.util.Streams;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DecompressingHttpClient;
import org.apache.http.impl.client.DefaultHttpClient;
@ -86,41 +79,20 @@ import org.apache.tools.tar.TarInputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.refine.ProjectManager;
import com.google.refine.ProjectMetadata;
import com.google.refine.RefineServlet;
import com.google.refine.importing.ImportingManager.Format;
import com.google.refine.importing.UrlRewriter.Result;
import com.google.refine.model.Cell;
import com.google.refine.model.Column;
import com.google.refine.model.ColumnModel;
import com.google.refine.model.Project;
import com.google.refine.model.Row;
import com.google.refine.model.metadata.DataPackageMetadata;
import com.google.refine.model.metadata.IMetadata;
import com.google.refine.model.metadata.MetadataFactory;
import com.google.refine.model.metadata.MetadataFormat;
import com.google.refine.model.metadata.PackageExtension;
import com.google.refine.model.metadata.ProjectMetadata;
import com.google.refine.preference.PreferenceStore;
import com.google.refine.util.JSONUtilities;
import com.google.refine.util.ParsingUtilities;
import io.frictionlessdata.datapackage.Package;
import io.frictionlessdata.tableschema.Field;
import io.frictionlessdata.tableschema.Schema;
import io.frictionlessdata.tableschema.TypeInferrer;
import io.frictionlessdata.tableschema.exceptions.TypeInferringException;
public class ImportingUtilities {
final static protected Logger logger = LoggerFactory.getLogger("importing-utilities");
private final static String METADATA_FILE_KEY = "metadataFile";
private static final int INFER_ROW_LIMIT = 100;
static public interface Progress {
public void setProgress(String message, int percent);
public boolean isCanceled();
@ -201,11 +173,11 @@ public class ImportingUtilities {
) throws Exception {
ArrayNode fileRecords = ParsingUtilities.mapper.createArrayNode();
JSONUtilities.safePut(retrievalRecord, "files", fileRecords);
JSONUtilities.safePut(retrievalRecord, "downloadCount", 0);
JSONUtilities.safePut(retrievalRecord, "archiveCount", 0);
int clipboardCount = 0;
int uploadCount = 0;
int downloadCount = 0;
int archiveCount = 0;
// This tracks the total progress, which involves uploading data from the client
// as well as downloading data from URLs.
@ -249,7 +221,7 @@ public class ImportingUtilities {
List<FileItem> tempFiles = (List<FileItem>)upload.parseRequest(request);
progress.setProgress("Uploading data ...", -1);
for (FileItem fileItem : tempFiles) {
parts: for (FileItem fileItem : tempFiles) {
if (progress.isCanceled()) {
break;
}
@ -284,27 +256,107 @@ public class ImportingUtilities {
} else if (name.equals("download")) {
String urlString = Streams.asString(stream);
download(rawDataDir, retrievalRecord, progress, fileRecords, update, urlString);
processDataPackage(retrievalRecord, fileRecords);
} else if (name.equals("data-package")) {
String urlString = Streams.asString(stream);
List<Result> results = null;
URL url = new URL(urlString);
ObjectNode fileRecord = ParsingUtilities.mapper.createObjectNode();
JSONUtilities.safePut(fileRecord, "origin", "download");
JSONUtilities.safePut(fileRecord, "url", urlString);
for (UrlRewriter rewriter : ImportingManager.urlRewriters) {
results = rewriter.rewrite(urlString);
if (results != null) {
for (Result result : results) {
download(rawDataDir, retrievalRecord, progress, fileRecords,
update, result.rewrittenUrl, result.metaDataFormat);
Result result = rewriter.rewrite(urlString);
if (result != null) {
urlString = result.rewrittenUrl;
url = new URL(urlString);
JSONUtilities.safePut(fileRecord, "url", urlString);
JSONUtilities.safePut(fileRecord, "format", result.format);
if (!result.download) {
downloadCount++;
JSONUtilities.append(fileRecords, fileRecord);
continue parts;
}
}
}
if ("http".equals(url.getProtocol()) || "https".equals(url.getProtocol())) {
DefaultHttpClient client = new DefaultHttpClient();
DecompressingHttpClient httpclient =
new DecompressingHttpClient(client);
HttpGet httpGet = new HttpGet(url.toURI());
httpGet.setHeader("User-Agent", RefineServlet.getUserAgent());
if ("https".equals(url.getProtocol())) {
// HTTPS only - no sending password in the clear over HTTP
String userinfo = url.getUserInfo();
if (userinfo != null) {
int s = userinfo.indexOf(':');
if (s > 0) {
String user = userinfo.substring(0, s);
String pw = userinfo.substring(s + 1, userinfo.length());
client.getCredentialsProvider().setCredentials(
new AuthScope(url.getHost(), 443),
new UsernamePasswordCredentials(user, pw));
}
}
}
HttpResponse response = httpclient.execute(httpGet);
try {
response.getStatusLine();
HttpEntity entity = response.getEntity();
if (entity == null) {
throw new Exception("No content found in " + url.toString());
}
InputStream stream2 = entity.getContent();
String encoding = null;
if (entity.getContentEncoding() != null) {
encoding = entity.getContentEncoding().getValue();
}
JSONUtilities.safePut(fileRecord, "declaredEncoding", encoding);
String contentType = null;
if (entity.getContentType() != null) {
contentType = entity.getContentType().getValue();
}
JSONUtilities.safePut(fileRecord, "declaredMimeType", contentType);
if (saveStream(stream2, url, rawDataDir, progress, update,
fileRecord, fileRecords,
entity.getContentLength())) {
archiveCount++;
}
downloadCount++;
EntityUtils.consume(entity);
} finally {
httpGet.releaseConnection();
}
} else {
// Fallback handling for non HTTP connections (only FTP?)
URLConnection urlConnection = url.openConnection();
urlConnection.setConnectTimeout(5000);
urlConnection.connect();
InputStream stream2 = urlConnection.getInputStream();
JSONUtilities.safePut(fileRecord, "declaredEncoding",
urlConnection.getContentEncoding());
JSONUtilities.safePut(fileRecord, "declaredMimeType",
urlConnection.getContentType());
try {
if (saveStream(stream2, url, rawDataDir, progress,
update, fileRecord, fileRecords,
urlConnection.getContentLength())) {
archiveCount++;
}
downloadCount++;
} finally {
stream2.close();
}
}
} else {
String value = Streams.asString(stream);
parameters.put(name, value);
// TODO: We really want to store this on the request so it's available for everyone
// request.getParameterMap().put(name, value);
}
} else { // is file content
String fileName = fileItem.getName();
if (fileName.length() > 0) {
@ -325,11 +377,9 @@ public class ImportingUtilities {
JSONUtilities.safePut(fileRecord, "size", saveStreamToFile(stream, file, null));
if (postProcessRetrievedFile(rawDataDir, file, fileRecord, fileRecords, progress)) {
JSONUtilities.safeInc(retrievalRecord, "archiveCount");
archiveCount++;
}
processDataPackage(retrievalRecord, fileRecords);
uploadCount++;
}
}
@ -343,144 +393,9 @@ public class ImportingUtilities {
}
JSONUtilities.safePut(retrievalRecord, "uploadCount", uploadCount);
JSONUtilities.safePut(retrievalRecord, "downloadCount", downloadCount);
JSONUtilities.safePut(retrievalRecord, "clipboardCount", clipboardCount);
}
private static void processDataPackage(ObjectNode retrievalRecord, ArrayNode fileRecords) {
int dataPackageJSONFileIndex = getDataPackageJSONFile(fileRecords);
if (dataPackageJSONFileIndex >= 0) {
ObjectNode dataPackageJSONFile = (ObjectNode) fileRecords.get(dataPackageJSONFileIndex);
JSONUtilities.safePut(dataPackageJSONFile, "metaDataFormat", MetadataFormat.DATAPACKAGE_METADATA.name());
JSONUtilities.safePut(retrievalRecord, METADATA_FILE_KEY, dataPackageJSONFile);
fileRecords.remove(dataPackageJSONFileIndex);
}
}
private static int getDataPackageJSONFile(ArrayNode fileRecords) {
for (int i = 0; i < fileRecords.size(); i++) {
ObjectNode file = JSONUtilities.getObjectElement(fileRecords, i);
if (file.has("archiveFileName") &&
file.has("fileName") &&
file.get("fileName").equals(DataPackageMetadata.DEFAULT_FILE_NAME)) {
return i;
}
}
return -1;
}
private static void download(File rawDataDir, ObjectNode retrievalRecord, final Progress progress,
ArrayNode fileRecords, final SavingUpdate update, String urlString)
throws URISyntaxException, IOException, ClientProtocolException, Exception {
download(rawDataDir, retrievalRecord, progress, fileRecords, update, urlString, null);
}
/**
* @param rawDataDir
* @param retrievalRecord
* @param progress
* @param fileRecords
* @param update
* @param urlString
* @throws URISyntaxException
* @throws IOException
* @throws ClientProtocolException
* @throws Exception
*/
private static void download(File rawDataDir, ObjectNode retrievalRecord, final Progress progress,
ArrayNode fileRecords, final SavingUpdate update, String urlString, String metaDataFormat)
throws URISyntaxException, IOException, ClientProtocolException, Exception {
URL url = new URL(urlString);
ObjectNode fileRecord = ParsingUtilities.mapper.createObjectNode();
JSONUtilities.safePut(fileRecord, "origin", "download");
JSONUtilities.safePut(fileRecord, "url", urlString);
if ("http".equals(url.getProtocol()) || "https".equals(url.getProtocol())) {
DefaultHttpClient client = new DefaultHttpClient();
DecompressingHttpClient httpclient =
new DecompressingHttpClient(client);
HttpGet httpGet = new HttpGet(url.toURI());
httpGet.setHeader("User-Agent", RefineServlet.getUserAgent());
if ("https".equals(url.getProtocol())) {
// HTTPS only - no sending password in the clear over HTTP
String userinfo = url.getUserInfo();
if (userinfo != null) {
int s = userinfo.indexOf(':');
if (s > 0) {
String user = userinfo.substring(0, s);
String pw = userinfo.substring(s + 1, userinfo.length());
client.getCredentialsProvider().setCredentials(
new AuthScope(url.getHost(), 443),
new UsernamePasswordCredentials(user, pw));
}
}
}
HttpResponse response = httpclient.execute(httpGet);
try {
int code = response.getStatusLine().getStatusCode();
if (code != HttpStatus.SC_OK) {
throw new Exception("HTTP response code: " + code +
" when accessing URL: "+ url.toString());
}
HttpEntity entity = response.getEntity();
if (entity == null) {
throw new Exception("No content found in " + url.toString());
}
InputStream stream2 = entity.getContent();
String encoding = null;
if (entity.getContentEncoding() != null) {
encoding = entity.getContentEncoding().getValue();
}
JSONUtilities.safePut(fileRecord, "declaredEncoding", encoding);
String contentType = null;
if (entity.getContentType() != null) {
contentType = entity.getContentType().getValue();
}
JSONUtilities.safePut(fileRecord, "declaredMimeType", contentType);
if (saveStream(stream2, url, rawDataDir, progress, update,
fileRecord, fileRecords,
entity.getContentLength())) {
JSONUtilities.safeInc(retrievalRecord, "archiveCount");
}
if (metaDataFormat != null) {
JSONUtilities.safePut(fileRecord, "metaDataFormat", metaDataFormat);
JSONUtilities.safePut(retrievalRecord, METADATA_FILE_KEY, fileRecord);
fileRecords.remove(0);
}
JSONUtilities.safeInc(retrievalRecord, "downloadCount");
EntityUtils.consume(entity);
} finally {
httpGet.releaseConnection();
}
} else {
// Fallback handling for non HTTP connections (only FTP?)
URLConnection urlConnection = url.openConnection();
urlConnection.setConnectTimeout(5000);
urlConnection.connect();
InputStream stream2 = urlConnection.getInputStream();
JSONUtilities.safePut(fileRecord, "declaredEncoding",
urlConnection.getContentEncoding());
JSONUtilities.safePut(fileRecord, "declaredMimeType",
urlConnection.getContentType());
try {
if (saveStream(stream2, url, rawDataDir, progress,
update, fileRecord, fileRecords,
urlConnection.getContentLength())) {
JSONUtilities.safeInc(retrievalRecord, "archiveCount");
}
if (metaDataFormat != null)
JSONUtilities.safePut(fileRecord, "metaDataFormat", metaDataFormat);
JSONUtilities.safeInc(retrievalRecord, "downloadCount");
} finally {
stream2.close();
}
}
JSONUtilities.safePut(retrievalRecord, "archiveCount", archiveCount);
}
private static boolean saveStream(InputStream stream, URL url, File rawDataDir, final Progress progress,
@ -1107,30 +1022,8 @@ public class ImportingUtilities {
if (exceptions.size() == 0) {
project.update(); // update all internal models, indexes, caches, etc.
boolean hasMetadataFileRecord = ((ObjectNode)job.getRetrievalRecord()).has(METADATA_FILE_KEY);
if (hasMetadataFileRecord) {
ObjectNode metadataFileRecord = (ObjectNode) job.getRetrievalRecord().get(METADATA_FILE_KEY);
String metadataFormat = JSONUtilities.getString(metadataFileRecord, "metaDataFormat", null);
IMetadata metadata = MetadataFactory.buildMetadata(MetadataFormat.valueOf(metadataFormat));
String relativePath = JSONUtilities.getString(metadataFileRecord, "location", null);
File metadataFile = new File(job.getRawDataDir(), relativePath);
metadata.loadFromFile(metadataFile);
// process the data package metadata
if (MetadataFormat.valueOf(metadataFormat) == MetadataFormat.DATAPACKAGE_METADATA) {
populateDataPackageMetadata(project, pm, (DataPackageMetadata) metadata);
}
logger.info(metadataFileRecord.get("metaDataFormat") + " metadata is set for project " + project.id);
}
ProjectManager.singleton.registerProject(project, pm);
// infer the column type
inferColumnType(project);
job.setProjectID(project.id);
job.setState("created-project");
} else {
@ -1141,101 +1034,10 @@ public class ImportingUtilities {
}
}
public static void inferColumnType(final Project project) {
if (project.columnModel.columns.get(0).getType().isEmpty()) {
List<Object[]> listCells = new ArrayList<Object[]>(INFER_ROW_LIMIT);
List<Row> rows = project.rows
.stream()
.limit(INFER_ROW_LIMIT)
.map(Row::dup)
.collect(Collectors.toList());
// convert the null object to prevent the NPE
for (Row row : rows) {
for (int i = 0; i < row.cells.size(); i++) {
Cell cell = row.cells.get(i);
if (cell == null) {
row.cells.set(i, new Cell(StringUtils.EMPTY, null));
}
}
listCells.add(row.cells.toArray());
}
try {
ObjectNode fieldsJSON = JSONUtilities.jsonObjectToObjectNode(TypeInferrer.getInstance().infer(listCells,
project.columnModel.getColumnNames().toArray(new String[0]),
100));
populateColumnTypes(project.columnModel, JSONUtilities.getArray(fieldsJSON, Schema.JSON_KEY_FIELDS));
} catch (TypeInferringException e) {
logger.error("infer column type exception.", ExceptionUtils.getStackTrace(e));
}
}
}
private static void populateDataPackageMetadata(Project project, ProjectMetadata pmd, DataPackageMetadata metadata) {
// project metadata
ObjectNode pkg = JSONUtilities.jsonObjectToObjectNode(metadata.getPackage().getJson());
pmd.setName(getDataPackageProperty(pkg, Package.JSON_KEY_NAME));
pmd.setDescription(getDataPackageProperty(pkg, PackageExtension.JSON_KEY_DESCRIPTION));
pmd.setTitle(getDataPackageProperty(pkg, PackageExtension.JSON_KEY_TITLE));
pmd.setHomepage(getDataPackageProperty(pkg, PackageExtension.JSON_KEY_HOMEPAGE));
pmd.setImage(getDataPackageProperty(pkg, PackageExtension.JSON_KEY_IMAGE));
pmd.setLicense(getDataPackageProperty(pkg, PackageExtension.JSON_KEY_LICENSE));
pmd.setVersion(getDataPackageProperty(pkg, PackageExtension.JSON_KEY_VERSION));
if (pkg.has(PackageExtension.JSON_KEY_KEYWORKS)) {
String[] tags = JSONUtilities.getStringArray(pkg, PackageExtension.JSON_KEY_KEYWORKS);
pmd.appendTags(tags);
}
// column model
ObjectNode schema = JSONUtilities.jsonObjectToObjectNode(metadata.getPackage().getResources().get(0).getSchema());
if (schema != null) {
populateColumnTypes(project.columnModel, JSONUtilities.getArray(schema, Schema.JSON_KEY_FIELDS));
}
}
private static String getDataPackageProperty(ObjectNode pkg, String key) {
return JSONUtilities.getString(pkg, key, StringUtils.EMPTY);
}
/**
* Populate the column model
* @param columnModel
* @param fieldsJSON
*/
private static void populateColumnTypes(ColumnModel columnModel, ArrayNode fieldsJSON) {
int cellIndex = 0;
Iterator<JsonNode> iter = fieldsJSON.iterator();
while(iter.hasNext()){
ObjectNode fieldJsonObj = (ObjectNode)iter.next();
Field field = new Field(JSONUtilities.objectNodeToJsonNode(fieldJsonObj));
Column column = columnModel.getColumnByCellIndex(cellIndex);
column.setType(field.getType());
column.setFormat(field.getFormat());
column.setDescription(field.getDescription());
column.setTitle(field.getTitle());
column.setConstraints(field.getConstraints());
cellIndex++;
}
}
/**
* Create project metadata. pull the "USER_NAME" from the PreferenceStore as the creator
* @param optionObj
* @return
*/
static public ProjectMetadata createProjectMetadata(ObjectNode optionObj) {
ProjectMetadata pm = new ProjectMetadata();
PreferenceStore ps = ProjectManager.singleton.getPreferenceStore();
pm.setName(JSONUtilities.getString(optionObj, "projectName", "Untitled"));
pm.setTags(JSONUtilities.getStringArray(optionObj, "projectTags"));
pm.setTitle(JSONUtilities.getString(optionObj, "title", ""));
pm.setHomepage(JSONUtilities.getString(optionObj, "homepage", ""));
pm.setImage(JSONUtilities.getString(optionObj, "image", ""));
pm.setLicense(JSONUtilities.getString(optionObj, "license", ""));
String encoding = JSONUtilities.getString(optionObj, "encoding", "UTF-8");
if ("".equals(encoding)) {
@ -1243,12 +1045,6 @@ public class ImportingUtilities {
encoding = "UTF-8";
}
pm.setEncoding(encoding);
if (ps.get(PreferenceStore.USER_NAME) != null) {
String creator = (String) ps.get(PreferenceStore.USER_NAME);
pm.setCreator(creator);
}
return pm;
}
}

View File

@ -33,47 +33,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package com.google.refine.importing;
import java.io.IOException;
import java.net.MalformedURLException;
import java.util.List;
import com.google.refine.model.metadata.DataPackageUrlRewriter;
/**
* Given a URL rewrittenUrl, the interface will rewrite it into different URLS based on the rewrittenUrl
* The result will be stored in the Result and can be used for download, parsing etc.
* Typical use is to parse the data package json file.
* @see DataPackageUrlRewriter
*/
public interface UrlRewriter {
static public class Result {
public String rewrittenUrl;
public String format;
public boolean download;
public String metaDataFormat;
public Result(String rewrittenUrl, String format, boolean download) {
this.rewrittenUrl = rewrittenUrl;
this.format = format;
this.download = download;
}
public Result(String rewrittenUrl, String format, boolean download, String metaDataFormat) {
this.rewrittenUrl = rewrittenUrl;
this.format = format;
this.download = download;
this.metaDataFormat = metaDataFormat;
}
}
/**
* Parse the url and output the Result
* @param url
* @return
* @throws MalformedURLException
* @throws IOException
*/
public List<Result> rewrite(String url) throws MalformedURLException, IOException;
public boolean filter(String url);
public Result rewrite(String url);
}

View File

@ -57,12 +57,9 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.refine.ProjectManager;
import com.google.refine.ProjectMetadata;
import com.google.refine.history.HistoryEntryManager;
import com.google.refine.model.Project;
import com.google.refine.model.metadata.DataPackageMetadata;
import com.google.refine.model.metadata.IMetadata;
import com.google.refine.model.metadata.MetadataFormat;
import com.google.refine.model.metadata.ProjectMetadata;
import com.google.refine.preference.PreferenceStore;
import com.google.refine.preference.TopList;
import com.google.refine.util.ParsingUtilities;
@ -127,6 +124,7 @@ public class FileProjectManager extends ProjectManager {
if (metadata == null) {
metadata = ProjectMetadataUtilities.recover(getProjectDir(projectID), projectID);
}
if (metadata != null) {
_projectsMetadata.put(projectID, metadata);
if (_projectsTags == null) {
@ -161,7 +159,7 @@ public class FileProjectManager extends ProjectManager {
untar(destDir, inputStream);
}
}
protected void untar(File destDir, InputStream inputStream) throws IOException {
TarInputStream tin = new TarInputStream(inputStream);
TarEntry tarEntry = null;
@ -237,21 +235,9 @@ public class FileProjectManager extends ProjectManager {
}
@Override
public void saveMetadata(IMetadata metadata, long projectId) throws Exception {
public void saveMetadata(ProjectMetadata metadata, long projectId) throws Exception {
File projectDir = getProjectDir(projectId);
if (metadata.getFormatName() == MetadataFormat.PROJECT_METADATA) {
Project project = ProjectManager.singleton.getProject(projectId);
((ProjectMetadata)metadata).setRowCount(project.rows.size());
ProjectMetadataUtilities.save(metadata, projectDir);
} else if (metadata.getFormatName() == MetadataFormat.DATAPACKAGE_METADATA) {
DataPackageMetadata dp = (DataPackageMetadata)metadata;
FileWriter writer = new FileWriter(new File(projectDir, DataPackageMetadata.DEFAULT_FILE_NAME));
ParsingUtilities.defaultWriter.writeValue(writer, dp);
writer.close();
}
logger.info("metadata saved in " + metadata.getFormatName());
ProjectMetadataUtilities.save(metadata, projectDir);
}
@Override
@ -331,6 +317,8 @@ public class FileProjectManager extends ProjectManager {
return saveWasNeeded;
}
@Override
public void deleteProject(long projectID) {
synchronized (this) {
@ -372,6 +360,8 @@ public class FileProjectManager extends ProjectManager {
protected boolean loadFromFile(File file) {
logger.info("Loading workspace: {}", file.getAbsolutePath());
_projectsMetadata.clear();
boolean found = false;
try {
@ -476,4 +466,4 @@ public class FileProjectManager extends ProjectManager {
_preferenceStore.put("scripting.expressions", newExpressions);
}
}
}
}

View File

@ -35,6 +35,7 @@ package com.google.refine.io;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
@ -46,23 +47,26 @@ import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.json.JSONException;
import org.json.JSONObject;
import org.json.JSONTokener;
import org.json.JSONWriter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.refine.ProjectMetadata;
import com.google.refine.model.Project;
import com.google.refine.model.metadata.IMetadata;
import com.google.refine.model.metadata.ProjectMetadata;
import com.google.refine.util.ParsingUtilities;
public class ProjectMetadataUtilities {
final static Logger logger = LoggerFactory.getLogger("project_metadata_utilities");
public static void save(IMetadata projectMeta, File projectDir) throws JSONException, IOException {
File tempFile = new File(projectDir, ProjectMetadata.TEMP_FILE_NAME);
public static void save(ProjectMetadata projectMeta, File projectDir) throws JSONException, IOException {
File tempFile = new File(projectDir, "metadata.temp.json");
saveToFile(projectMeta, tempFile);
File file = new File(projectDir, ProjectMetadata.DEFAULT_FILE_NAME);
File oldFile = new File(projectDir, ProjectMetadata.OLD_FILE_NAME);
File file = new File(projectDir, "metadata.json");
File oldFile = new File(projectDir, "metadata.old.json");
if (oldFile.exists()) {
oldFile.delete();
@ -75,7 +79,7 @@ public class ProjectMetadataUtilities {
tempFile.renameTo(file);
}
protected static void saveToFile(IMetadata projectMeta, File metadataFile) throws JSONException, IOException {
protected static void saveToFile(ProjectMetadata projectMeta, File metadataFile) throws JSONException, IOException {
Writer writer = new OutputStreamWriter(new FileOutputStream(metadataFile));
try {
ParsingUtilities.defaultWriter.writeValue(writer, projectMeta);
@ -157,8 +161,8 @@ public class ProjectMetadataUtilities {
}
static protected ProjectMetadata loadFromFile(File metadataFile) throws Exception {
ProjectMetadata projectMetaData = new ProjectMetadata();
projectMetaData.loadFromFile(metadataFile);
return projectMetaData;
FileReader reader = new FileReader(metadataFile);
return ProjectMetadata.loadFromStream(reader);
}
}

View File

@ -36,8 +36,6 @@ package com.google.refine.io;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import java.util.zip.ZipOutputStream;
@ -47,9 +45,6 @@ import org.slf4j.LoggerFactory;
import com.google.refine.ProjectManager;
import com.google.refine.model.Project;
import com.google.refine.model.metadata.DataPackageMetadata;
import com.google.refine.model.metadata.IMetadata;
import com.google.refine.model.metadata.MetadataFormat;
import com.google.refine.util.Pool;
@ -115,63 +110,38 @@ public class ProjectUtilities {
out.close();
}
}
static public Project loadDataFile(File dir, String dataFile, long id) {
static public Project load(File dir, long id) {
try {
File file = new File(dir, dataFile);
File file = new File(dir, "data.zip");
if (file.exists()) {
return loadFromFile(file, id);
}
} catch (Exception e) {
e.printStackTrace();
}
try {
File file = new File(dir, "data.temp.zip");
if (file.exists()) {
return loadFromFile(file, id);
}
} catch (Exception e) {
e.printStackTrace();
}
try {
File file = new File(dir, "data.old.zip");
if (file.exists()) {
return loadFromFile(file, id);
}
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
static public Project load(File dir, long id) {
Project project =null;
if ((project = loadDataFile(dir, "data.zip", id)) == null) {
if ((project = loadDataFile(dir, "data.temp.zip", id)) == null) {
project = loadDataFile(dir, "data.old.zip", id);
}
}
return project;
}
/**
* scan the folder for json files and read them as metadata
* @param dir
* @param project
*/
public static Map<MetadataFormat, IMetadata> retriveMetadata(File dir) {
// load the metadatas from data folder.
Map<MetadataFormat, IMetadata> metadataMap = new HashMap<MetadataFormat, IMetadata>();
File[] jsons = dir.listFiles(
(folder, file) -> {
return file.toLowerCase().endsWith(".json");
}
);
for (File file : jsons) {
// already loaded
if (file.getName().startsWith("metadata."))
continue;
DataPackageMetadata metadata = new DataPackageMetadata();
// load itself
metadata.loadFromFile(file);
metadataMap.put(MetadataFormat.DATAPACKAGE_METADATA, metadata);
}
return metadataMap;
}
static protected Project loadFromFile(
File file,
long id

View File

@ -39,6 +39,13 @@ import java.lang.reflect.Method;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import org.json.JSONException;
import org.json.JSONObject;
import org.json.JSONWriter;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonInclude;
@ -50,11 +57,6 @@ import com.google.refine.InterProjectModel;
import com.google.refine.model.recon.ReconConfig;
import com.google.refine.util.ParsingUtilities;
import io.frictionlessdata.tableschema.Field;
import io.frictionlessdata.tableschema.TypeInferrer;
import io.frictionlessdata.tableschema.exceptions.ConstraintsException;
import io.frictionlessdata.tableschema.exceptions.InvalidCastException;
public class Column {
final private int _cellIndex;
final private String _originalName;
@ -64,7 +66,7 @@ public class Column {
// from data package metadata Field.java:
private String type = "";
private String format = Field.FIELD_FORMAT_DEFAULT;
private String format = "";
private String title = "";
private String description = "";
private Map<String, Object> constraints = Collections.emptyMap();
@ -237,23 +239,4 @@ public class Column {
public String toString() {
return _name;
}
public <Any> Any castValue(String value)
throws InvalidCastException, ConstraintsException {
if (this.type.isEmpty()) {
throw new InvalidCastException();
} else {
try {
// Using reflection to invoke appropriate type casting method from the
// TypeInferrer class
String castMethodName = "cast" + (this.type.substring(0, 1).toUpperCase() + this.type.substring(1));
Method method = TypeInferrer.class.getMethod(castMethodName, String.class, String.class, Map.class);
Object castValue = method.invoke(TypeInferrer.getInstance(), this.format, value, null);
return (Any) castValue;
} catch (Exception e) {
throw new InvalidCastException();
}
}
}
}

View File

@ -51,9 +51,9 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.refine.ProjectManager;
import com.google.refine.ProjectMetadata;
import com.google.refine.RefineServlet;
import com.google.refine.history.History;
import com.google.refine.model.metadata.ProjectMetadata;
import com.google.refine.process.ProcessManager;
import com.google.refine.util.ParsingUtilities;
import com.google.refine.util.Pool;
@ -73,13 +73,14 @@ public class Project {
transient private LocalDateTime _lastSave = LocalDateTime.now();
final static Logger logger = LoggerFactory.getLogger("project");
static public long generateID() {
return System.currentTimeMillis() + Math.round(Math.random() * 1000000000000L);
}
public Project() {
this(generateID());
id = generateID();
history = new History(this);
}
protected Project(long id) {
@ -116,6 +117,10 @@ public class Project {
this._lastSave = LocalDateTime.now();
}
public ProjectMetadata getMetadata() {
return ProjectManager.singleton.getProjectMetadata(id);
}
public void saveToOutputStream(OutputStream out, Pool pool) throws IOException {
for (OverlayModel overlayModel : overlayModels.values()) {
try {
@ -240,14 +245,11 @@ public class Project {
columnModel.update();
recordModel.update(this);
}
//wrapper of processManager variable to allow unit testing
//TODO make the processManager variable private, and force all calls through this method
public ProcessManager getProcessManager() {
return this.processManager;
}
public ProjectMetadata getMetadata() {
return ProjectManager.singleton.getProjectMetadata(id);
}
}

View File

@ -62,21 +62,6 @@ public class ColumnAdditionChange extends ColumnChange {
newCells.toArray(_newCells);
}
public String getColumnName() {
return _columnName;
}
public int getColumnIndex() {
return _columnIndex;
}
public int getNewCellIndex() {
return _newCellIndex;
}
@Override
public void apply(Project project) {
synchronized (project) {

View File

@ -58,18 +58,6 @@ public class ColumnMoveChange extends ColumnChange {
_newColumnIndex = index;
}
public int getOldColumnIndex() {
return _oldColumnIndex;
}
public String getColumnName() {
return _columnName;
}
public int getNewColumnIndex() {
return _newColumnIndex;
}
@Override
public void apply(Project project) {
synchronized (project) {

View File

@ -54,15 +54,11 @@ public class ColumnRemovalChange extends ColumnChange {
protected Column _oldColumn;
protected CellAtRow[] _oldCells;
protected List<ColumnGroup> _oldColumnGroups;
public ColumnRemovalChange(int index) {
_oldColumnIndex = index;
}
public int getOldColumnIndex() {
return _oldColumnIndex;
}
@Override
public void apply(Project project) {
synchronized (project) {

View File

@ -57,11 +57,6 @@ public class ColumnReorderChange extends ColumnChange {
_columnNames = columnNames;
}
public List<String> getColumnNames() {
return _columnNames;
}
@Override
public void apply(Project project) {
synchronized (project) {

View File

@ -54,7 +54,7 @@ import com.google.refine.model.Project;
import com.google.refine.model.Row;
import com.google.refine.util.Pool;
public class ColumnSplitChange extends ColumnChange {
public class ColumnSplitChange implements Change {
final protected String _columnName;
final protected List<String> _columnNames;
@ -118,21 +118,6 @@ public class ColumnSplitChange extends ColumnChange {
_newRows = newRows;
}
public List<String> getColumnNames() {
return _columnNames;
}
public boolean isRemoveOriginalColumn() {
return _removeOriginalColumn;
}
public int getColumnIndex() {
return _columnIndex;
}
@Override
public void apply(Project project) {
synchronized (project) {

View File

@ -1,59 +0,0 @@
package com.google.refine.model.metadata;
import java.io.File;
import java.io.IOException;
import java.time.LocalDateTime;
import org.apache.commons.beanutils.PropertyUtils;
import org.json.JSONObject;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.refine.util.ParsingUtilities;
public abstract class AbstractMetadata implements IMetadata {
@JsonIgnore
private MetadataFormat formatName = MetadataFormat.UNKNOWN;
@JsonIgnore
protected LocalDateTime written = null;
@JsonProperty("modified")
protected LocalDateTime _modified;
public MetadataFormat getFormatName() {
return formatName;
}
public void setFormatName(MetadataFormat formatName) {
this.formatName = formatName;
}
public void loadFromJSON(String obj) throws IOException {
ParsingUtilities.mapper.readerForUpdating(this).readValue(obj);
}
@Override
public abstract void loadFromFile(File metadataFile);
@Override
public boolean isDirty() {
return written == null || _modified.isAfter(written);
}
@Override
public LocalDateTime getModified() {
return _modified;
}
@Override
public void updateModified() {
_modified = LocalDateTime.now();
}
protected static boolean propertyExists(Object bean, String property) {
return PropertyUtils.isReadable(bean, property) &&
PropertyUtils.isWriteable(bean, property);
}
}

View File

@ -1,106 +0,0 @@
package com.google.refine.model.metadata;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.everit.json.schema.ValidationException;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.annotation.JsonRawValue;
import com.fasterxml.jackson.annotation.JsonValue;
import io.frictionlessdata.datapackage.Package;
import io.frictionlessdata.datapackage.Resource;
import io.frictionlessdata.datapackage.exceptions.DataPackageException;
public class DataPackageMetadata extends AbstractMetadata {
private final static Logger logger = LoggerFactory.getLogger(DataPackageMetadata.class);
public static final String DEFAULT_FILE_NAME = "datapackage.json";
private Package _pkg;
@JsonValue
@JsonRawValue
public String getJson() {
return _pkg.getJson().toString();
}
public DataPackageMetadata() {
setFormatName(MetadataFormat.DATAPACKAGE_METADATA);
_pkg = PackageExtension.buildPackageFromTemplate();
}
@Override
public void loadFromJSON(JSONObject obj) {
try {
_pkg = new Package(obj);
} catch (ValidationException | DataPackageException | IOException e) {
logger.error("Load from JSONObject failed" + obj.toString(4),
ExceptionUtils.getStackTrace(e));
}
logger.info("Data Package metadata loaded");
}
@Override
public void loadFromFile(File metadataFile) {
String jsonString = null;
try {
jsonString = FileUtils.readFileToString(metadataFile);
} catch (IOException e) {
logger.error("Load data package failed when reading from file: " + metadataFile.getAbsolutePath(),
ExceptionUtils.getStackTrace(e));
}
loadFromJSON(new JSONObject(jsonString));
}
@Override
public void loadFromStream(InputStream inputStream) {
try {
this._pkg = new Package(IOUtils.toString(inputStream));
} catch (ValidationException e) {
logger.error("validation failed", ExceptionUtils.getStackTrace(e));
} catch (DataPackageException e) {
logger.error("Data package excpetion when loading from stream", ExceptionUtils.getStackTrace(e));
} catch (IOException e) {
logger.error("IO exception when loading from stream", ExceptionUtils.getStackTrace(e));
}
}
public List<String> getResourcePaths() {
List<String> listResources = new ArrayList<String>();
for (Resource resource : _pkg.getResources()) {
listResources.add((String) resource.getPath());
}
return listResources;
}
public Package getPackage() {
return _pkg;
}
@Override
public List<Exception> validate() {
try {
_pkg.validate();
} catch (ValidationException | IOException | DataPackageException e) {
logger.error("validate json failed", ExceptionUtils.getStackTrace(e));
}
return _pkg.getErrors();
}
}

View File

@ -1,44 +0,0 @@
package com.google.refine.model.metadata;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import com.google.refine.importing.UrlRewriter;
public class DataPackageUrlRewriter implements UrlRewriter {
@Override
public List<Result> rewrite(String url) throws MalformedURLException, IOException {
List<Result> listResult = new ArrayList<Result>();
if (!filter(url))
return listResult;
listResult.add(new Result(url, "json", true, MetadataFormat.DATAPACKAGE_METADATA.name()));
DataPackageMetadata meta = new DataPackageMetadata();
meta.loadFromStream(new URL(url).openStream());
// Import the data files.
for (String path : meta.getResourcePaths()) {
String fileURL = getBaseURL(url) + "/" + path;
listResult.add(new Result(fileURL,
"", // leave to guesser. "text/line-based/*sv"
true));
}
return listResult;
}
@Override
public boolean filter(String url) {
return url.endsWith(DataPackageMetadata.DEFAULT_FILE_NAME);
}
private String getBaseURL(String url) {
return url.replaceFirst(DataPackageMetadata.DEFAULT_FILE_NAME, "");
}
}

View File

@ -1,29 +0,0 @@
package com.google.refine.model.metadata;
import java.io.File;
import java.io.InputStream;
import java.time.LocalDateTime;
import java.util.List;
import org.json.JSONObject;
/**
* Interface to import/export metadata
*/
public interface IMetadata {
public void loadFromFile(File metadataFile);
public void loadFromStream(InputStream inputStream);
public MetadataFormat getFormatName();
public void setFormatName(MetadataFormat format);
public LocalDateTime getModified();
public void updateModified();
public boolean isDirty();
public List<Exception> validate();
}

View File

@ -1,82 +0,0 @@
package com.google.refine.model.metadata;
import java.io.IOException;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.everit.json.schema.ValidationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.refine.model.Project;
import com.google.refine.util.JSONUtilities;
import com.google.refine.util.ParsingUtilities;
import io.frictionlessdata.datapackage.Package;
import io.frictionlessdata.datapackage.Resource;
import io.frictionlessdata.datapackage.exceptions.DataPackageException;
public class MetadataFactory {
private final static Logger logger = LoggerFactory.getLogger(MetadataFactory.class);
/**
* Build metadata based on the format
* @param format
* @return
*/
public static IMetadata buildMetadata(MetadataFormat format) {
IMetadata metadata = null;
if (format == MetadataFormat.PROJECT_METADATA) {
metadata = new ProjectMetadata();
} else if (format == MetadataFormat.DATAPACKAGE_METADATA) {
metadata = new DataPackageMetadata();
}
return metadata;
}
/**
* build an empty Data Package Metadata
* @return
*/
public static DataPackageMetadata buildDataPackageMetadata() {
return (DataPackageMetadata) buildMetadata(MetadataFormat.DATAPACKAGE_METADATA);
}
/**
* Build an empty data package metadata, then populate the fields from the Project Metadata
* @param project
* @return
*/
public static DataPackageMetadata buildDataPackageMetadata(Project project) {
DataPackageMetadata dpm = buildDataPackageMetadata();
ProjectMetadata pmd = project.getMetadata();
Package pkg = dpm.getPackage();
Resource resource = SchemaExtension.createResource(project.getMetadata().getName(),
project.columnModel);
try {
pkg.addResource(resource);
putValue(pkg, Package.JSON_KEY_NAME, pmd.getName());
putValue(pkg, PackageExtension.JSON_KEY_LAST_UPDATED, ParsingUtilities.localDateToString(pmd.getModified()));
putValue(pkg, PackageExtension.JSON_KEY_DESCRIPTION, pmd.getDescription());
putValue(pkg, PackageExtension.JSON_KEY_TITLE, pmd.getTitle());
putValue(pkg, PackageExtension.JSON_KEY_HOMEPAGE, pmd.getHomepage());
putValue(pkg, PackageExtension.JSON_KEY_IMAGE, pmd.getImage());
putValue(pkg, PackageExtension.JSON_KEY_LICENSE, pmd.getLicense());
pkg.removeProperty(PackageExtension.JSON_KEY_KEYWORKS);
pkg.addProperty(PackageExtension.JSON_KEY_KEYWORKS, JSONUtilities.arrayToJSONArray(pmd.getTags()));
} catch (ValidationException | IOException | DataPackageException e) {
logger.error(ExceptionUtils.getStackTrace(e));
}
return dpm;
}
private static void putValue(Package pkg, String key, String value) throws DataPackageException {
if(pkg.getJson().has(key)) {
pkg.removeProperty(key);
}
pkg.addProperty(key, value);
}
}

View File

@ -1,24 +0,0 @@
package com.google.refine.model.metadata;
/**
* A list of supported metadata format
*
*/
public enum MetadataFormat {
UNKNOWN("UNKNOWN"),
PROJECT_METADATA("PROJECT_METADATA"),
DATAPACKAGE_METADATA("DATAPACKAGE_METADATA"),
CSVW_METADATA("CSVW_METADATA");
private final String format;
private MetadataFormat(final String format) {
this.format = format;
}
@Override
public String toString() {
return format;
}
}

View File

@ -1,88 +0,0 @@
package com.google.refine.model.metadata;
import java.io.ByteArrayOutputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.everit.json.schema.ValidationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import io.frictionlessdata.datapackage.Package;
import io.frictionlessdata.datapackage.exceptions.DataPackageException;
/**
* This class contains some methods which is not included in the official "Data Package" repo for now.
* Some methods can be removed after the official library provide the corresponding function.
*/
public class PackageExtension {
private final static Logger logger = LoggerFactory.getLogger(PackageExtension.class);
private static final int JSON_INDENT_FACTOR = 4;
public static final String JSON_KEY_LAST_UPDATED = "last_updated";
public static final String JSON_KEY_DESCRIPTION = "description";
public static final String JSON_KEY_KEYWORKS = "keywords";
public static final String JSON_KEY_TITLE = "title";
public static final String JSON_KEY_HOMEPAGE = "homepage";
public static final String JSON_KEY_IMAGE = "image";
public static final String JSON_KEY_LICENSE = "license";
public static final String JSON_KEY_VERSION = "version";
public static String DATAPACKAGE_TEMPLATE_FILE = "schemas/datapackage-template.json";
/**
* Do the package since the final spec for the compression/bundle are not settled yet.
* https://github.com/frictionlessdata/datapackage-js/issues/93
*
* @param pkg Package
* @param dataByteArrayOutputStream ByteArrayOutputStream
* @param destOs OutputStream
* @throws IOException
* @throws FileNotFoundException
* @see Package#saveZip(String outputFilePath)
*/
public static void saveZip(Package pkg, final ByteArrayOutputStream dataByteArrayOutputStream, final OutputStream destOs) throws FileNotFoundException, IOException {
try(ZipOutputStream zos = new ZipOutputStream(destOs)){
// json file
ZipEntry entry = new ZipEntry(DataPackageMetadata.DEFAULT_FILE_NAME);
zos.putNextEntry(entry);
zos.write(pkg.getJson().toString(JSON_INDENT_FACTOR).getBytes());
zos.closeEntry();
// default data file to data.csv or given path(can only handle one file because files cannot be restored)
String path = (String) pkg.getResources().get(0).getPath();
entry = new ZipEntry(StringUtils.isBlank(path) ? "data.csv" : path);
zos.putNextEntry(entry);
zos.write(dataByteArrayOutputStream.toByteArray());
zos.closeEntry();
}
}
/**
* To build a Package object from a template file contains empty metadata
*
* @param templateFile
*/
public static Package buildPackageFromTemplate() {
try {
ClassLoader classLoader = PackageExtension.class.getClassLoader();
InputStream inputStream = classLoader.getResourceAsStream(DATAPACKAGE_TEMPLATE_FILE);
return new Package(IOUtils.toString(inputStream), false);
} catch (ValidationException e) {
logger.error("validation failed", ExceptionUtils.getStackTrace(e));
} catch (DataPackageException e) {
logger.error("DataPackage Exception", ExceptionUtils.getStackTrace(e));
} catch (IOException e) {
logger.error("IOException when build package from template", ExceptionUtils.getStackTrace(e));
}
return null;
}
}

View File

@ -1,483 +0,0 @@
/*
Copyright 2010, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.model.metadata;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Serializable;
import java.lang.reflect.InvocationTargetException;
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.commons.beanutils.BeanUtils;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.json.JSONTokener;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonInclude.Include;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonRawValue;
import com.fasterxml.jackson.annotation.JsonUnwrapped;
import com.fasterxml.jackson.annotation.JsonView;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.refine.ProjectManager;
import com.google.refine.preference.PreferenceStore;
import com.google.refine.preference.TopList;
import com.google.refine.util.JSONUtilities;
import com.google.refine.util.JsonViews;
import com.google.refine.util.ParsingUtilities;
public class ProjectMetadata extends AbstractMetadata {
final public static String DEFAULT_FILE_NAME = "metadata.json";
final public static String TEMP_FILE_NAME = "metadata.temp.json";
final public static String OLD_FILE_NAME = "metadata.old.json";
@JsonProperty("created")
private LocalDateTime _created;
@JsonProperty("name")
private String _name = "";
@JsonProperty("password")
@JsonView(JsonViews.SaveMode.class)
private String _password = "";
@JsonProperty("encoding")
@JsonView(JsonViews.SaveMode.class)
private String _encoding = "";
@JsonProperty("encodingConfidence")
@JsonView(JsonViews.SaveMode.class)
private int _encodingConfidence;
@JsonProperty("rowCount")
private int _rowCount;
// user metadata
@JsonIgnore
private ArrayNode _userMetadata = ParsingUtilities.mapper.createArrayNode();
// _tags maps to keywords of the data package metadata
@JsonProperty("tags")
private String[] _tags = new String[0];
@JsonProperty("creator")
private String _creator = "";
@JsonProperty("contributors")
private String _contributors = "";
@JsonProperty("subject")
private String _subject = ""; // Several refine projects may be linked
@JsonProperty("description")
private String _description = ""; // free form of comment
// import options is an array for 1-n data sources
@JsonIgnore
private ArrayNode _importOptionMetadata = ParsingUtilities.mapper.createArrayNode();
@JsonProperty("customMetadata")
private Map<String, Serializable> _customMetadata = new HashMap<String, Serializable>();
@JsonProperty("preferences")
@JsonView(JsonViews.SaveMode.class)
private PreferenceStore _preferenceStore = new PreferenceStore();
// below 5 fields are from data package metadata:
@JsonProperty("title")
private String title = "";
@JsonProperty("homepage")
private String homepage;
@JsonProperty("image")
private String image = "";
@JsonProperty("license")
private String license = "";
@JsonProperty("version")
private String version = "";
@JsonProperty(PreferenceStore.USER_METADATA_KEY)
@JsonInclude(Include.NON_NULL)
public ArrayNode getJsonUserMetadata() {
if (_userMetadata.size() > 0) {
return _userMetadata;
}
return null;
}
@JsonProperty(PreferenceStore.USER_METADATA_KEY)
protected void setUserMetadataJson(ArrayNode json) {
_userMetadata = json;
}
@JsonProperty("importOptionMetadata")
@JsonInclude(Include.NON_NULL)
public ArrayNode getJsonImportOptionMetadata() {
if (_importOptionMetadata.size() > 0) {
return _importOptionMetadata;
}
return null;
}
@JsonProperty("importOptionMetadata")
public void setImportOptionMetadataJson(ArrayNode options) {
_importOptionMetadata = options;
// this field should always be present so we can update the last updated time here
this.written = LocalDateTime.now();
}
// backwards compatibility
@JsonProperty("expressions")
protected void setExpressions(TopList expressions) {
this._preferenceStore.put("scripting.expressions", expressions);
}
private final static Logger logger = LoggerFactory.getLogger("project_metadata");
protected ProjectMetadata(LocalDateTime date) {
setFormatName(MetadataFormat.PROJECT_METADATA);
_created = date;
preparePreferenceStore(_preferenceStore);
}
public ProjectMetadata() {
this(LocalDateTime.now());
_modified = _created;
}
public ProjectMetadata(LocalDateTime created, LocalDateTime modified, String name) {
this(created);
_modified = modified;
_name = name;
}
public void setRowCount(int rowCount) {
this._rowCount = rowCount;
updateModified();
}
@JsonProperty("saveModeWritten")
@JsonView(JsonViews.SaveMode.class)
@JsonInclude(Include.NON_NULL)
public String setSaveModeWritten() {
written = LocalDateTime.now();
return null;
}
static protected void preparePreferenceStore(PreferenceStore ps) {
ProjectManager.preparePreferenceStore(ps);
// Any project specific preferences?
}
public LocalDateTime getCreated() {
return _created;
}
public void setEncodingConfidence(int confidence) {
this._encodingConfidence = confidence;
updateModified();
}
public void setEncodingConfidence(String confidence) {
if (confidence != null) {
this.setEncodingConfidence(Integer.parseInt(confidence));
}
}
public int getEncodingConfidence() {
return _encodingConfidence;
}
public void setTags(String[] tags) {
if (tags != null) {
List<String> tmpTags = new ArrayList<String>(tags.length);
for (String tag : tags) {
if (tag != null) {
String trimmedTag = tag.trim();
if (!trimmedTag.isEmpty()) {
tmpTags.add(trimmedTag);
}
}
}
this._tags = tmpTags.toArray(new String[tmpTags.size()]);
} else {
this._tags = tags;
}
updateModified();
}
public void appendTags(String[] tags) {
String[] mergedTags = (String[])ArrayUtils.addAll(this._tags, tags);
setTags(mergedTags);
}
public String[] getTags() {
if (_tags == null) this._tags = new String[0];
return _tags;
}
public void setPassword(String password) {
this._password = password;
updateModified();
}
public String getPassword() {
return _password;
}
public LocalDateTime getModified() {
return _modified;
}
public void updateModified() {
_modified = LocalDateTime.now();
}
public PreferenceStore getPreferenceStore() {
return _preferenceStore;
}
public Serializable getCustomMetadata(String key) {
return _customMetadata.get(key);
}
public void setCustomMetadata(String key, Serializable value) {
if (value == null) {
_customMetadata.remove(key);
} else {
_customMetadata.put(key, value);
}
updateModified();
}
@JsonIgnore
public ArrayNode getImportOptionMetadata() {
return _importOptionMetadata;
}
@JsonIgnore
public void setImportOptionMetadata(ArrayNode jsonArray) {
_importOptionMetadata = jsonArray;
updateModified();
}
public void appendImportOptionMetadata(ObjectNode options) {
_importOptionMetadata.add(options);
updateModified();
}
public String getEncoding() {
return _encoding;
}
public void setName(String name) {
this._name = name;
updateModified();
}
public String getName() {
return _name;
}
public void setEncoding(String encoding) {
this._encoding = encoding;
updateModified();
}
public String getCreator() {
return _creator;
}
public void setCreator(String creator) {
this._creator = creator;
updateModified();
}
public String getContributors() {
return _contributors;
}
public void setContributors(String contributors) {
this._contributors = contributors;
updateModified();
}
public String getSubject() {
return _subject;
}
public void setSubject(String subject) {
this._subject = subject;
updateModified();
}
public String getDescription() {
return _description;
}
public void setDescription(String description) {
this._description = description;
updateModified();
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
updateModified();
}
public String getHomepage() {
return homepage;
}
public void setHomepage(String homepage) {
this.homepage = homepage;
updateModified();
}
public String getImage() {
return image;
}
public void setImage(String image) {
this.image = image;
updateModified();
}
public String getLicense() {
return license;
}
public void setLicense(String license) {
this.license = license;
updateModified();
}
public String getVersion() {
return version;
}
public void setVersion(String version) {
this.version = version;
updateModified();
}
public ArrayNode getUserMetadata() {
return _userMetadata;
}
public void setUserMetadata(ArrayNode userMetadata) {
this._userMetadata = userMetadata;
}
private void updateUserMetadata(String metaName, String valueString) {
for (int i = 0; i < _userMetadata.size(); i++) {
try {
JsonNode obj = _userMetadata.get(i);
if (obj.get("name").asText("").equals(metaName)) {
((ObjectNode) obj).put("value", valueString);
}
} catch (JSONException e) {
logger.error(ExceptionUtils.getStackTrace(e));
}
}
}
public void setAnyStringField(String metaName, String valueString) {
if (propertyExists(this, metaName)) {
try {
if (metaName.equals("tags")) {
BeanUtils.setProperty(this, metaName, valueString.split(","));
} else
BeanUtils.setProperty(this, metaName, valueString);
} catch (IllegalAccessException | InvocationTargetException ite) {
logger.error(ExceptionUtils.getStackTrace(ite));
}
} else {
updateUserMetadata(metaName, valueString);
}
}
@Override
public void loadFromFile(File metadataFile) {
InputStream targetStream = null;
try {
targetStream = FileUtils.openInputStream(metadataFile);
} catch (IOException e) {
logger.error(ExceptionUtils.getStackTrace(e));
}
loadFromStream(targetStream);
}
@Override
public void loadFromStream(InputStream inputStream) {
try (InputStreamReader reader = new InputStreamReader(inputStream)) {
JSONTokener tokener = new JSONTokener(reader);
JSONObject obj = (JSONObject) tokener.nextValue();
this.loadFromJSON(IOUtils.toString(inputStream));
} catch (IOException e) {
logger.error(ExceptionUtils.getStackTrace(e));
}
}
@Override
public List<Exception> validate() {
return null;
}
}

View File

@ -1,64 +0,0 @@
package com.google.refine.model.metadata;
import com.google.refine.model.Column;
import com.google.refine.model.ColumnModel;
import io.frictionlessdata.datapackage.Resource;
import io.frictionlessdata.tableschema.Field;
import io.frictionlessdata.tableschema.Schema;
/**
* This class contains some methods which is not included in the official "table schema" repo for now.
* Some methods can be removed after the official library provide the corresponding function.
*/
public class SchemaExtension {
private static final String DEFAULT_RESOURCE_PATH = "data/";
private static final String DEFAULT_RESOURCE_SUFFIX = ".csv";
/**
* insert the field to schema at specified position
* @param schema
* @param field
* @param position
*/
public static void insertField(Schema schema, Field field, int position) {
schema.getFields().add(position, field);
}
/**
* Remove the filed from the schema at specified position
* @param schema
* @param index
* @return
*/
public static Field removeField(Schema schema, int index) {
return schema.getFields().remove(index);
}
/**
* Create a resource by name, get the schema information from the ColumnModel
* @param resourceName
* @param columnModel
* @return
* @see ColumnModel
*/
public static Resource createResource(String resourceName, ColumnModel columnModel) {
// populate the data package schema from the openrefine column model
Schema schema = new Schema();
for (Column column : columnModel.columns) {
schema.addField(new Field(column.getName(),
column.getType(),
column.getFormat(),
column.getTitle(),
column.getDescription(),
column.getConstraints()));
}
Resource resource = new Resource(resourceName,
DEFAULT_RESOURCE_PATH + resourceName + DEFAULT_RESOURCE_SUFFIX,
schema.getJson());
return resource;
}
}

View File

@ -1,21 +0,0 @@
package com.google.refine.model.metadata.validator;
import org.json.JSONObject;
import com.google.refine.model.AbstractOperation;
import com.google.refine.model.Project;
public class ValidateOperation extends AbstractOperation {
private Project project;
private JSONObject options;
public ValidateOperation(Project project, JSONObject options) {
this.project = project;
this.options = options;
}
public JSONObject startProcess() {
return ValidatorInspector.inspect(project, options);
}
}

View File

@ -1,6 +0,0 @@
package com.google.refine.model.metadata.validator;
public class ValidatorConfig {
}

View File

@ -1,6 +0,0 @@
package com.google.refine.model.metadata.validator;
public class ValidatorExceptions {
}

View File

@ -1,102 +0,0 @@
package com.google.refine.model.metadata.validator;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.json.JSONArray;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.refine.model.Column;
import com.google.refine.model.Project;
import com.google.refine.model.metadata.validator.checks.TypeorFormatError;
import com.google.refine.model.metadata.validator.checks.Validator;
import com.google.refine.util.JSONUtilities;
public class ValidatorInspector {
private final static Logger logger = LoggerFactory.getLogger(ValidatorInspector.class);
/**
* Return a report contains the validate result
* @param project
* @param options
* @return
*/
public static JSONObject inspect(Project project, JSONObject options) {
List<String> columnNames;
String COLUMN_NAMES_KEY = "columnNames";
Map<String, List<Validator>> columnToCheckersMap = new HashMap<String, List<Validator>>();
JSONArray validateReport = new JSONArray();
logger.info("starting inspect with options:" + options.toString());
columnNames = JSONUtilities.toStringList(options.getJSONArray(COLUMN_NAMES_KEY));
// build the check items
List<Validator> validatorList = null;
for(String columnName : columnNames) {
validatorList = compileChecks(project, columnName, options);
if (validatorList.size() >= 0)
columnToCheckersMap.put(columnName, validatorList);
}
logger.info("==========================================================");
logger.info("Inspector finished the checks compile. will do following check:");
for (Entry<String, List<Validator>> entry : columnToCheckersMap.entrySet()) {
logger.info("Column Name: " + entry.getKey());
for (Validator v : entry.getValue()) {
logger.info("\t Validator: " + v.getClass().getSimpleName());
}
}
logger.info("==========================================================");
// do the inspect in another loop:
for(String columnName : columnNames) {
List<Validator> validators = columnToCheckersMap.get(columnName);
if (validators != null) {
for (Validator validator : validators) {
JSONArray result = validator.validate();
if (result != null && result.length() > 0)
JSONUtilities.concatArray(validateReport, result);
}
}
}
logger.info("Inspector finished the validation.");
return new JSONObject().put("validation-reports", (Object)validateReport);
}
private static List<Validator> compileChecks(Project project, String columnName, JSONObject options) {
Map<String, Class> constraintHandlersMap = ValidatorRegistry.getInstance().getConstraintHandlersMap();
Column column = project.columnModel.getColumnByName(columnName);
List<Validator> validatorList = new ArrayList<Validator>();
int columnIndex = project.columnModel.getColumnIndexByName(columnName);
validatorList.add(new TypeorFormatError(project, columnIndex, options));
if (column.getConstraints() != null) {
for (Entry<String, Object> entry : column.getConstraints().entrySet()) {
Class<Validator> clazz = constraintHandlersMap.get(entry.getKey());
try {
Constructor<Validator> c = clazz.getConstructor(Project.class, int.class, JSONObject.class);
validatorList.add(c.newInstance(project, columnIndex, options));
} catch (InstantiationException | IllegalAccessException | IllegalArgumentException
| InvocationTargetException | NoSuchMethodException | SecurityException e) {
logger.error("failed to do compileChecks:" + ExceptionUtils.getStackTrace(e));
}
}
}
return validatorList;
}
}

View File

@ -1,41 +0,0 @@
package com.google.refine.model.metadata.validator;
import java.util.HashMap;
import java.util.Map;
import com.google.refine.model.metadata.validator.checks.EnumerableConstraint;
import com.google.refine.model.metadata.validator.checks.MaximumConstraint;
import com.google.refine.model.metadata.validator.checks.MaximumLengthConstraint;
import com.google.refine.model.metadata.validator.checks.MinimumConstraint;
import com.google.refine.model.metadata.validator.checks.MinimumLengthConstraint;
import com.google.refine.model.metadata.validator.checks.PatternConstraint;
import com.google.refine.model.metadata.validator.checks.RequiredConstraint;
import io.frictionlessdata.tableschema.Field;
public class ValidatorRegistry {
private static ValidatorRegistry instance = null;
private Map<String, Class> constraintHandlersMap = null;
private ValidatorRegistry() {
constraintHandlersMap = new HashMap<String, Class>();
constraintHandlersMap.put(Field.CONSTRAINT_KEY_ENUM,EnumerableConstraint.class);
constraintHandlersMap.put(Field.CONSTRAINT_KEY_MAXIMUM, MaximumConstraint.class);
constraintHandlersMap.put(Field.CONSTRAINT_KEY_MAX_LENGTH, MaximumLengthConstraint.class);
constraintHandlersMap.put(Field.CONSTRAINT_KEY_MINIMUM, MinimumConstraint.class);
constraintHandlersMap.put(Field.CONSTRAINT_KEY_MIN_LENGTH, MinimumLengthConstraint.class);
constraintHandlersMap.put(Field.CONSTRAINT_KEY_PATTERN, PatternConstraint.class);
constraintHandlersMap.put(Field.CONSTRAINT_KEY_REQUIRED, RequiredConstraint.class);
}
public static ValidatorRegistry getInstance() {
if (instance == null)
instance = new ValidatorRegistry();
return instance;
}
public Map<String, Class> getConstraintHandlersMap() {
return constraintHandlersMap;
}
}

View File

@ -1,28 +0,0 @@
package com.google.refine.model.metadata.validator;
import java.util.Locale;
import java.util.ResourceBundle;
public class ValidatorSpec {
private static String VALIDATOR_RESOURCE_BUNDLE = "validator-resource-bundle";
private static ValidatorSpec instance = null;
private ResourceBundle bundle;
private ValidatorSpec() {
Locale locale = new Locale("en", "US");
bundle = ResourceBundle.getBundle(VALIDATOR_RESOURCE_BUNDLE, locale);
}
public static ValidatorSpec getInstance() {
if (instance == null)
instance = new ValidatorSpec();
return instance;
}
public String getMessage(String code) {
return bundle.getString(code);
}
}

View File

@ -1,122 +0,0 @@
package com.google.refine.model.metadata.validator.checks;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.lang3.text.StrSubstitutor;
import org.json.JSONArray;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.refine.model.Cell;
import com.google.refine.model.Column;
import com.google.refine.model.Project;
import com.google.refine.model.Row;
import com.google.refine.model.metadata.validator.ValidatorSpec;
public abstract class AbstractValidator implements Validator {
protected final Logger logger = LoggerFactory.getLogger(this.getClass());
protected Project project;
protected int cellIndex;
protected JSONObject options;
protected Column column;
protected String code;
protected JSONArray jsonErros = null;
protected Map<String, String> lookup = new HashMap<String, String>(6);
/**
* Constructor
* @param project
* @param cellIndex
* @param options
*/
public AbstractValidator(Project project, int cellIndex, JSONObject options) {
this.project = project;
this.cellIndex = cellIndex;
this.options = options;
this.column = project.columnModel.getColumnByCellIndex(cellIndex);
}
@Override
public JSONArray validate() {
for (int rowIndex = 0;rowIndex < project.rows.size();rowIndex++) {
Row row = project.rows.get(rowIndex);
Cell cell = row.getCell(cellIndex);
if (filter(cell))
continue;
boolean checkResult = checkCell(cell);
if (!checkResult) {
addError(formatErrorMessage(cell, rowIndex + 1));
}
}
return jsonErros;
}
@Override
public JSONObject formatErrorMessage(Cell cell, int rowIndex) {
String message = null;
message = ValidatorSpec.getInstance().getMessage(code);
String formattedMessage = format(message, cell.value.toString(), rowIndex, cellIndex, code);
JSONObject json = new JSONObject();
json.put("code", code);
json.put("message", formattedMessage);
json.put("row-number", rowIndex);
json.put("column-number", cellIndex);
return json;
}
/**
* MessageFormat.format cannot take the named parameters.
* @param message
* @param value
* @param rowIndex
* @param cellIndex
* @param code
* @return
*/
private String format(String message, String value, int rowIndex, int cellIndex, String code) {
lookup.put("value", value);
lookup.put("row_number", Integer.toString(rowIndex));
lookup.put("column_number", Integer.toString(cellIndex));
lookup.put("constraint", code);
customizedFormat();
return new StrSubstitutor(lookup).replace(message);
}
/*
* Empty body since default there is no customized Format
* @see com.google.refine.model.metadata.validator.checks.Validator#customizedFormat()
*/
@Override
public void customizedFormat() {
}
/**
* will skip the cell if return true
*/
@Override
public boolean filter(Cell cell) {
return cell == null || cell.value == null;
}
@Override
public boolean checkCell(Cell cell) {
return false;
}
@Override
public void addError(JSONObject result) {
if (jsonErros == null)
jsonErros = new JSONArray();
jsonErros.put(result);
}
}

View File

@ -1,11 +0,0 @@
package com.google.refine.model.metadata.validator.checks;
import org.json.JSONObject;
import com.google.refine.model.Project;
public class BlankHeader extends AbstractValidator {
public BlankHeader(Project project, int cellIndex, JSONObject options) {
super(project, cellIndex, options);
}
}

View File

@ -1,12 +0,0 @@
package com.google.refine.model.metadata.validator.checks;
import org.json.JSONObject;
import com.google.refine.model.Project;
public class BlankRow extends AbstractValidator {
public BlankRow(Project project, int cellIndex, JSONObject options) {
super(project, cellIndex, options);
}
}

View File

@ -1,11 +0,0 @@
package com.google.refine.model.metadata.validator.checks;
import org.json.JSONObject;
import com.google.refine.model.Project;
public class DuplicateHeader extends AbstractValidator {
public DuplicateHeader(Project project, int cellIndex, JSONObject options) {
super(project, cellIndex, options);
}
}

View File

@ -1,12 +0,0 @@
package com.google.refine.model.metadata.validator.checks;
import org.json.JSONObject;
import com.google.refine.model.Project;
public class DuplicateRow extends AbstractValidator {
public DuplicateRow(Project project, int cellIndex, JSONObject options) {
super(project, cellIndex, options);
}
}

View File

@ -1,28 +0,0 @@
package com.google.refine.model.metadata.validator.checks;
import java.util.List;
import org.json.JSONObject;
import com.google.refine.model.Cell;
import com.google.refine.model.Project;
import io.frictionlessdata.tableschema.Field;
public class EnumerableConstraint extends AbstractValidator {
private List<Object> enumList;
@SuppressWarnings("unchecked")
public EnumerableConstraint(Project project, int cellIndex, JSONObject options) {
super(project, cellIndex, options);
this.code = "enumerable-constraint";
enumList = (List<Object>) column.getConstraints().get(Field.CONSTRAINT_KEY_ENUM);
}
@Override
public boolean checkCell(Cell cell) {
// XXX: deal with recon
return enumList.contains(cell.value);
}
}

View File

@ -1,11 +0,0 @@
package com.google.refine.model.metadata.validator.checks;
import org.json.JSONObject;
import com.google.refine.model.Project;
public class ExtraHeader extends AbstractValidator {
public ExtraHeader(Project project, int cellIndex, JSONObject options) {
super(project, cellIndex, options);
}
}

View File

@ -1,12 +0,0 @@
package com.google.refine.model.metadata.validator.checks;
import org.json.JSONObject;
import com.google.refine.model.Project;
public class ExtraValue extends AbstractValidator {
public ExtraValue(Project project, int cellIndex, JSONObject options) {
super(project, cellIndex, options);
}
}

View File

@ -1,39 +0,0 @@
package com.google.refine.model.metadata.validator.checks;
import org.json.JSONObject;
import com.google.refine.model.Cell;
import com.google.refine.model.Project;
import io.frictionlessdata.tableschema.Field;
import io.frictionlessdata.tableschema.exceptions.ConstraintsException;
import io.frictionlessdata.tableschema.exceptions.InvalidCastException;
@SuppressWarnings("rawtypes")
public class MaximumConstraint extends AbstractValidator {
private String threshold;
public MaximumConstraint(Project project, int cellIndex, JSONObject options) throws InvalidCastException, ConstraintsException {
super(project, cellIndex, options);
this.code = "maximum-constraint";
threshold = (String)column.getConstraints()
.get(Field.CONSTRAINT_KEY_MAXIMUM);
}
@SuppressWarnings("unchecked")
@Override
public boolean checkCell(Cell cell) {
boolean valid = true;
try {
Comparable value = column.castValue(cell.value.toString());
// return this - threshold
if (value.compareTo(column.castValue(threshold)) > 0)
valid = false;
} catch (InvalidCastException | ConstraintsException e) {
valid = false;
}
return valid;
}
}

View File

@ -1,26 +0,0 @@
package com.google.refine.model.metadata.validator.checks;
import org.json.JSONObject;
import com.google.refine.model.Cell;
import com.google.refine.model.Project;
import io.frictionlessdata.tableschema.Field;
public class MaximumLengthConstraint extends AbstractValidator {
private int maxLength;
public MaximumLengthConstraint(Project project, int cellIndex, JSONObject options) {
super(project, cellIndex, options);
this.code = "maximum-length-constraint";
maxLength = (int) column.getConstraints()
.get(Field.CONSTRAINT_KEY_MAX_LENGTH);
}
@Override
public boolean checkCell(Cell cell) {
return cell.value.toString().length() <= maxLength;
}
}

View File

@ -1,39 +0,0 @@
package com.google.refine.model.metadata.validator.checks;
import org.json.JSONObject;
import com.google.refine.model.Cell;
import com.google.refine.model.Project;
import io.frictionlessdata.tableschema.Field;
import io.frictionlessdata.tableschema.exceptions.ConstraintsException;
import io.frictionlessdata.tableschema.exceptions.InvalidCastException;
@SuppressWarnings("rawtypes")
public class MinimumConstraint extends AbstractValidator {
private String threshold;
public MinimumConstraint(Project project, int cellIndex, JSONObject options) throws InvalidCastException, ConstraintsException {
super(project, cellIndex, options);
this.code = "minimum-constraint";
threshold = (String)column.getConstraints()
.get(Field.CONSTRAINT_KEY_MINIMUM);
}
@SuppressWarnings("unchecked")
@Override
public boolean checkCell(Cell cell) {
boolean valid = true;
try {
Comparable value = column.castValue(cell.value.toString());
// return this - threshold
if (value.compareTo(column.castValue(threshold)) < 0)
valid = false;
} catch (InvalidCastException | ConstraintsException e) {
valid = false;
}
return valid;
}
}

View File

@ -1,35 +0,0 @@
package com.google.refine.model.metadata.validator.checks;
import org.json.JSONObject;
import com.google.refine.model.Cell;
import com.google.refine.model.Project;
import io.frictionlessdata.tableschema.Field;
public class MinimumLengthConstraint extends AbstractValidator {
private int minLength;
public MinimumLengthConstraint(Project project, int cellIndex, JSONObject options) {
super(project, cellIndex, options);
this.code = "minimum-length-constrain";
minLength = (int)column.getConstraints()
.get(Field.CONSTRAINT_KEY_MIN_LENGTH);
}
@Override
public boolean filter(Cell cell) {
return true;
}
@Override
public boolean checkCell(Cell cell) {
if (cell == null || cell.value == null)
return false;
return cell.value.toString().length() >= minLength;
}
}

View File

@ -1,12 +0,0 @@
package com.google.refine.model.metadata.validator.checks;
import org.json.JSONObject;
import com.google.refine.model.Project;
public class MissingHeader extends AbstractValidator {
public MissingHeader(Project project, int cellIndex, JSONObject options) {
super(project, cellIndex, options);
}
}

View File

@ -1,12 +0,0 @@
package com.google.refine.model.metadata.validator.checks;
import org.json.JSONObject;
import com.google.refine.model.Project;
public class MissingValue extends AbstractValidator {
public MissingValue(Project project, int cellIndex, JSONObject options) {
super(project, cellIndex, options);
}
}

View File

@ -1,12 +0,0 @@
package com.google.refine.model.metadata.validator.checks;
import org.json.JSONObject;
import com.google.refine.model.Project;
public class NonMatchingHeader extends AbstractValidator {
public NonMatchingHeader(Project project, int cellIndex, JSONObject options) {
super(project, cellIndex, options);
}
}

View File

@ -1,30 +0,0 @@
package com.google.refine.model.metadata.validator.checks;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.json.JSONObject;
import com.google.refine.model.Cell;
import com.google.refine.model.Project;
import io.frictionlessdata.tableschema.Field;
public class PatternConstraint extends AbstractValidator {
private String regexPattern;
public PatternConstraint(Project project, int cellIndex, JSONObject options) {
super(project, cellIndex, options);
this.code = "pattern-constraint";
this.regexPattern = (String)column.getConstraints().get(Field.CONSTRAINT_KEY_PATTERN);
}
@Override
public boolean checkCell(Cell cell) {
Pattern pattern = Pattern.compile(regexPattern);
Matcher matcher = pattern.matcher((String)cell.value);
return matcher.matches();
}
}

View File

@ -1,26 +0,0 @@
package com.google.refine.model.metadata.validator.checks;
import org.apache.commons.lang3.StringUtils;
import org.json.JSONObject;
import com.google.refine.model.Cell;
import com.google.refine.model.Project;
public class RequiredConstraint extends AbstractValidator {
public RequiredConstraint(Project project, int cellIndex, JSONObject options) {
super(project, cellIndex, options);
this.code = "required-constraint";
}
@Override
public boolean filter(Cell cell) {
// always check
return false;
}
@Override
public boolean checkCell(Cell cell) {
return StringUtils.isNotBlank(cell.value.toString());
}
}

View File

@ -1,49 +0,0 @@
package com.google.refine.model.metadata.validator.checks;
import org.json.JSONObject;
import com.google.refine.model.Cell;
import com.google.refine.model.Project;
import io.frictionlessdata.tableschema.exceptions.ConstraintsException;
import io.frictionlessdata.tableschema.exceptions.InvalidCastException;
public class TypeorFormatError extends AbstractValidator {
private String type;
private String format;
public TypeorFormatError(Project project, int cellIndex, JSONObject options) {
super(project, cellIndex, options);
this.code = "type-or-format-error";
this.type = column.getType();
this.format = column.getFormat();
}
@Override
public boolean checkCell(Cell cell) {
boolean valid = true;
try {
column.castValue(cell.value.toString());
} catch (InvalidCastException | ConstraintsException e) {
// patch for issue: https://github.com/frictionlessdata/tableschema-java/issues/21
if ("number".equals(type)) {
try {
column.castValue(cell.value.toString() + ".0");
} catch (InvalidCastException | ConstraintsException e1) {
valid = false;
}
} else
valid = false;
}
return valid;
}
@Override
public void customizedFormat() {
lookup.put("field_type", type);
lookup.put("field_format", format);
}
}

View File

@ -1,12 +0,0 @@
package com.google.refine.model.metadata.validator.checks;
import org.json.JSONObject;
import com.google.refine.model.Project;
public class UniqueConstraint extends AbstractValidator {
public UniqueConstraint(Project project, int cellIndex, JSONObject options) {
super(project, cellIndex, options);
this.code = "unique-constraint";
}
}

View File

@ -1,36 +0,0 @@
package com.google.refine.model.metadata.validator.checks;
import org.json.JSONArray;
import org.json.JSONObject;
import com.google.refine.model.Cell;
public interface Validator {
/**
* Given the options and cell index, apply the validate operation.
* @return
*/
public JSONArray validate();
/**
* Skip if cell is incomplete
* @return
*/
public boolean filter(Cell cell);
/**
* check the cell against the table schema
* @param cell
* @return false if fails the validation / check. Otherwise return true
*/
public boolean checkCell(Cell cell);
/**
* Add error into the report for return
*/
public void addError(JSONObject result);
public JSONObject formatErrorMessage(Cell cell, int rowIndex);
public void customizedFormat();
}

View File

@ -256,7 +256,7 @@ public class ReconciledDataExtensionJob {
String str = val.getString("str");
storeCell(rows, rowindex, colindex, str);
} else if (val.has("float")) {
float v = val.getBigDecimal("float").floatValue();
float v = Float.parseFloat(val.getString("float"));
storeCell(rows, rowindex, colindex, v);
} else if (val.has("int")) {
int v = Integer.parseInt(val.getString("int"));

View File

@ -47,7 +47,7 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang.StringUtils;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;

View File

@ -41,7 +41,6 @@ import java.util.Map;
import java.util.Set;
import org.apache.commons.lang3.StringUtils;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.refine.browsing.Engine;

View File

@ -56,9 +56,6 @@ import com.google.refine.util.ParsingUtilities;
public class PreferenceStore {
public static final String USER_METADATA_KEY = "userMetadata";
// use to populate "creator" filed in metadata. https://github.com/OpenRefine/OpenRefine/issues/1393
public static final String USER_NAME = "username";
private boolean dirty = false;
protected Map<String, Object> _prefs = new HashMap<>();

View File

@ -39,7 +39,7 @@ import java.util.Enumeration;
import java.util.Iterator;
import java.util.Properties;
import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.commons.lang.StringEscapeUtils;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
@ -157,7 +157,7 @@ public class JSObject extends Properties {
writeJSONObject(writer, (JSONObject) o);
} else {
writer.print("\"" + StringEscapeUtils.escapeEcmaScript(o.toString()) + "\"");
writer.print("\"" + StringEscapeUtils.escapeJavaScript(o.toString()) + "\"");
}
}
}

View File

@ -59,9 +59,9 @@ import org.json.JSONException;
import org.json.JSONObject;
import org.json.JSONTokener;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;

View File

@ -1,13 +0,0 @@
schema-error=Table Schema error: ${error_message}
non-matching-header=Header in column ${column_number} doesn't match field name ${field_name} in the schema
extra-header=There is an extra header in column ${column_number}
missing-header=There is a missing header in column ${column_number}
type-or-format-error=The value ${value} in row ${row_number} and column ${column_number} is not type ${field_type} and format ${field_format}
required-constraint=Column ${column_number} is a required field, but row ${row_number} has no value
pattern-constraint=The value ${value} in row ${row_number} and column ${column_number} does not conform to the pattern constraint of ${constraint}
unique-constraint=Rows ${row_numbers} has unique constraint violation in column ${column_number}
enumerable-constraint=The value ${value} in row ${row_number} and column ${column_number} does not conform to the given enumeration: ${constraint}
minimum-constraint=The value ${value} in row ${row_number} and column ${column_number} does not conform to the minimum constraint of ${constraint}
maximum-constraint=The value ${value} in row ${row_number} and column ${column_number} does not conform to the maximum constraint of ${constraint}
minimum-length-constraint=The value ${value} in row ${row_number} and column ${column_number} does not conform to the minimum length constraint of ${constraint}
maximum-length-constraint=The value ${value} in row ${row_number} and column ${column_number} does not conform to the maximum length constraint of ${constraint}

Some files were not shown because too many files have changed in this diff Show More