Merge branch 'master' of https://github.com/openrefine/openrefine into add-maven-release-variable

This commit is contained in:
Thad Guidry 2020-04-17 10:45:39 -05:00
commit 63fa607737
30 changed files with 10295 additions and 0 deletions

11
docs/.gitignore vendored Normal file
View File

@ -0,0 +1,11 @@
.DS_Store
node_modules
lib/core/metadata.js
lib/core/MetadataBlog.js
translated_docs
build/
yarn.lock
i18n/*

107
docs/core/Footer.js Normal file
View File

@ -0,0 +1,107 @@
/**
* Copyright (c) 2017-present, Facebook, Inc.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
const React = require('react');
class Footer extends React.Component {
docUrl(doc, language) {
const baseUrl = this.props.config.baseUrl;
const docsUrl = this.props.config.docsUrl;
const docsPart = `${docsUrl ? `${docsUrl}/` : ''}`;
const langPart = `${language ? `${language}/` : ''}`;
return `${baseUrl}${docsPart}${langPart}${doc}`;
}
pageUrl(doc, language) {
const baseUrl = this.props.config.baseUrl;
return baseUrl + (language ? `${language}/` : '') + doc;
}
render() {
return (
<footer className="nav-footer" id="footer">
<section className="sitemap">
<a href={this.props.config.baseUrl} className="nav-home">
{this.props.config.footerIcon && (
<img
src={this.props.config.baseUrl + this.props.config.footerIcon}
alt={this.props.config.title}
width="66"
height="58"
/>
)}
</a>
<div>
<h5>Docs</h5>
<a href={this.docUrl('install.html', this.props.language)}>
Install OpenRefine
</a>
<a href="http://openrefine.org/documentation.html">
External Resources
</a>
</div>
<div>
<h5>Community</h5>
<a
href="https://stackoverflow.com/questions/tagged/openrefine/"
target="_blank"
rel="noreferrer noopener">
Stack Overflow
</a>
<a
href="https://twitter.com/OpenRefine"
target="_blank"
rel="noreferrer noopener">
Twitter
</a>
</div>
<div>
<h5>More</h5>
<a href="https://openrefine.org/category/blog.html">Blog</a>
<a href="https://github.com/OpenRefine/OpenRefine">GitHub</a>
<a
className="github-button"
href={this.props.config.repoUrl}
data-icon="octicon-star"
data-count-href="/OpenRefine/OpenRefine/stargazers"
data-show-count="true"
data-count-aria-label="# stargazers on GitHub"
aria-label="Star this project on GitHub">
Star
</a>
{this.props.config.twitterUsername && (
<div className="social">
<a
href={`https://twitter.com/${this.props.config.twitterUsername}`}
className="twitter-follow-button">
Follow @{this.props.config.twitterUsername}
</a>
</div>
)}
{this.props.config.facebookAppId && (
<div className="social">
<div
className="fb-like"
data-href={this.props.config.url}
data-colorscheme="dark"
data-layout="standard"
data-share="true"
data-width="225"
data-show-faces="false"
/>
</div>
)}
</div>
</section>
<section className="copyright">{this.props.config.copyright}</section>
</footer>
);
}
}
module.exports = Footer;

9725
docs/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

14
docs/package.json Normal file
View File

@ -0,0 +1,14 @@
{
"scripts": {
"examples": "docusaurus-examples",
"start": "docusaurus-start",
"build": "docusaurus-build",
"publish-gh-pages": "docusaurus-publish",
"write-translations": "docusaurus-write-translations",
"version": "docusaurus-version",
"rename-version": "docusaurus-rename-version"
},
"devDependencies": {
"docusaurus": "^1.14.4"
}
}

38
docs/sidebars.json Normal file
View File

@ -0,0 +1,38 @@
{
"docs": {
"Getting Started": [
"index",
"install",
"user_data",
"records_mode"
],
"Importing Data": [
"importers/csv",
"importers/json",
"importers/xml"
],
"Operations": [
"operations/transform",
"operations/add_column",
"operations/fill_down",
"operations/blank_down",
"operations/recon",
"operations/key_value_columnize"
],
"Facets": [
"facets/text",
"facets/numeric",
"facets/scatterplot"
],
"Exporters": [
"exporters/csv",
"exporters/gsheets",
"exporters/ods"
],
"GREL": [
"grel/cross",
"grel/facetCount",
"grel/toString"
]
}
}

81
docs/siteConfig.js Normal file
View File

@ -0,0 +1,81 @@
// Documentation for these options can be found at:
// https://docusaurus.io/docs/en/site-config
const siteConfig = {
title: 'OpenRefine', // Title for your website.
tagline: 'A power tool for working with messy data.',
url: 'https://openrefine.github.io/',
baseUrl: '/docs-prototype/',
projectName: 'OpenRefine',
organizationName: 'OpenRefine',
// For no header links in the top nav bar -> headerLinks: [],
headerLinks: [
{doc: 'index', label: 'Home'},
],
users: [],
customDocsPath: 'docs/src',
docsUrl: '',
/* path to images for header/footer */
headerIcon: 'img/openrefine_logo.png',
footerIcon: 'img/openrefine_logo.png',
favicon: 'img/openrefine_logo.png',
/* Colors for website */
colors: {
primaryColor: '#196581',
secondaryColor: '#5a4411',
},
/* Custom fonts for website */
/*
fonts: {
myFont: [
"Times New Roman",
"Serif"
],
myOtherFont: [
"-apple-system",
"system-ui"
]
},
*/
copyright: `Copyright © ${new Date().getFullYear()} OpenRefine contributors`,
highlight: {
// Highlight.js theme to use for syntax highlighting in code blocks.
theme: 'default',
},
// Add custom scripts here that would be placed in <script> tags.
scripts: ['https://buttons.github.io/buttons.js'],
// On page navigation for the current documentation page.
onPageNav: 'separate',
// No .html extensions for paths.
cleanUrl: false,
// Open Graph and Twitter card images.
ogImage: 'img/openrefine_logo.png',
twitterImage: 'img/openrefine_logo.png',
// Expand/collapse the links and subcategories under categories.
docsSideNavCollapsible: true,
// Show documentation's last contributor's name.
// enableUpdateBy: true,
// Show documentation's last update time.
// enableUpdateTime: true,
// You may provide arbitrary config keys to be used as needed by your
// template. For example, if you need your repo's URL...
repoUrl: 'https://github.com/OpenRefine/OpenRefine',
twitterUsername: 'OpenRefine'
};
module.exports = siteConfig;

View File

@ -0,0 +1,7 @@
---
id: csv
title: Exporting to CSV/TSV
sidebar_label: CSV/TSV
---

View File

@ -0,0 +1,6 @@
---
id: gsheets
title: Exporting to Google Sheets
sidebar_label: Google Sheets
---

View File

@ -0,0 +1,6 @@
---
id: ods
title: Exporting to ODS
sidebar_label: ODS
---

View File

@ -0,0 +1,7 @@
---
id: numeric
title: Numeric Facets
sidebar_label: Numeric
---
Numeric facets lets you inspect the distribution of numeric values in a column.

View File

@ -0,0 +1,7 @@
---
id: scatterplot
title: Scatterplot Facets
sidebar_label: Scatterplot
---

7
docs/src/facets/text.md Normal file
View File

@ -0,0 +1,7 @@
---
id: text
title: Text Facets
sidebar_label: Text
---
Text facets list the different values found in a column, with their occurence counts.

6
docs/src/grel/cross.md Normal file
View File

@ -0,0 +1,6 @@
---
id: cross
title: The cross function
sidebar_label: cross
---

View File

@ -0,0 +1,6 @@
---
id: facetCount
title: The facetCount function
sidebar_label: facetCount
---

View File

@ -0,0 +1,6 @@
---
id: toString
title: The toString function
sidebar_label: toString
---

View File

@ -0,0 +1,7 @@
---
id: csv
title: Importing CSV/TSV files
sidebar_label: CSV/TSV
---
Comma-Separated Values (CSV) and Tab-Separated Values (TSV) are widespread formats to represent tables.

View File

@ -0,0 +1,7 @@
---
id: json
title: Importing JSON files
sidebar_label: JSON
---
JSON (JavaScript Object Notation) is a hierarchical data format commonly used on the Web.

View File

@ -0,0 +1,7 @@
---
id: xml
title: Importing XML files
sidebar_label: XML
---

9
docs/src/index.md Normal file
View File

@ -0,0 +1,9 @@
---
id: index
title: OpenRefine reference manual
sidebar_label: Index
---
This site is just a prototype to propose a basic publishing infrastructure for OpenRefine's new reference manual.
The structure on the left hand side is just a placeholder, as are the contents of all pages.

17
docs/src/install.md Normal file
View File

@ -0,0 +1,17 @@
---
id: install
title: Installing OpenRefine
sidebar_label: Installing
---
OpenRefine is available for Windows, Mac OS and Linux. The install process depends on the platform.
# Windows
# Mac OS
# Linux
# Using Docker

View File

@ -0,0 +1,7 @@
---
id: add_column
title: Add Column Based on This Column
sidebar_label: Add Column
---
This operation lets you create a new column, whose values are computed using GREL or another expression language.

View File

@ -0,0 +1,7 @@
---
id: blank_down
title: Blank down
sidebar_label: Blank down
---

View File

@ -0,0 +1,7 @@
---
id: fill_down
title: Fill down
sidebar_label: Fill down
---

View File

@ -0,0 +1,152 @@
---
id: key_value_columnize
title: Columnize by key/value columns
sidebar_label: Columnize by key/value
---
This operation can be used to reshape a table which contains *key* and *value* columns, such that the repeating contents in the key column become new column names, and the contents of the value column are spread in the new columns. This operation can be invoked from
any column menu, via **Transpose****Columnize by key/value columns**.
Overview
--------
Consider the following table:
| Field | Data |
|---------|-----------------------|
| Name | Galanthus nivalis |
| Color | White |
| IUCN ID | 162168 |
| Name | Narcissus cyclamineus |
| Color | Yellow |
| IUCN ID | 161899 |
In this format, each flower species is described by multiple attributes, which are spread on consecutive rows.
In this example, the "Field" column contains the keys and the "Data" column contains the values. With
this configuration, the *Columnize by key/value columns* operations transforms this table as follows:
| Name | Color | IUCN ID |
|-----------------------|----------|---------|
| Galanthus nivalis | White | 162168 |
| Narcissus cyclamineus | Yellow | 161899 |
Entries with multiple values in the same column
-----------------------------------------------
If an entry has multiple values for a given key, then these values will be grouped on consecutive rows,
to form a [record structure](../records_mode.md).
For instance, flower species can have multiple colors:
| Field | Data |
|-------------|-----------------------|
| Name | Galanthus nivalis |
| **Color** | **White** |
| **Color** | **Green** |
| IUCN ID | 162168 |
| Name | Narcissus cyclamineus |
| Color | Yellow |
| IUCN ID | 161899 |
This table is transformed by the operation as follows:
| Name | Color | IUCN ID |
|-----------------------|----------|---------|
| Galanthus nivalis | White | 162168 |
| | Green | |
| Narcissus cyclamineus | Yellow | 161899 |
The first key encountered by the operation serves as the record key.
The "Green" value is attached to the "Galanthus nivalis" name because it is the latest record key encountered by the operation as it scans the table. See the [Row order](#row-order) section for more details about the influence of row order on
the results of the operation.
Notes column
------------
In addition to the key and value columns, a *notes* column can be used optionally. This can be used
to store extra metadata associated to a key/value pair.
Consider the following example:
| Field | Data | Source |
|---------|-----------------------|-----------------------|
| Name | Galanthus nivalis | IUCN |
| Color | White | Contributed by Martha |
| IUCN ID | 162168 | |
| Name | Narcissus cyclamineus | Legacy |
| Color | Yellow | 2009 survey |
| IUCN ID | 161899 | |
If the "Source" column is selected as notes column, this table is transformed to:
| Name | Color | IUCN ID | Source : Name | Source : Color |
|-----------------------|----------|---------|---------------|-----------------------|
| Galanthus nivalis | White | 162168 | IUCN | Contributed by Martha |
| Narcissus cyclamineus | Yellow | 161899 | Legacy | 2009 survey |
Notes columns can therefore be used to preserve provenance or other context about a particular key/value pair.
Extra columns
-------------
If the table contains extra columns, which are not used as key, value or notes columns, they can be preserved
by the operation. For this to work, they must have the same value in all old rows corresponding to a new row.
Consider for instance the following table, where the "Field" and "Data" columns are used as key and value columns
respectively, and the "Wikidata ID" column is not selected:
| Field | Data | Wikidata ID |
|---------|-----------------------|-------------|
| Name | Galanthus nivalis | Q109995 |
| Color | White | Q109995 |
| IUCN ID | 162168 | Q109995 |
| Name | Narcissus cyclamineus | Q1727024 |
| Color | Yellow | Q1727024 |
| IUCN ID | 161899 | Q1727024 |
This will be transformed to
| Wikidata ID | Name | Color | IUCN ID |
|-------------|-----------------------|----------|---------|
| Q109995 | Galanthus nivalis | White | 162168 |
| Q1727024 | Narcissus cyclamineus | Yellow | 161899 |
If extra columns do not contain identical values for all old rows spanning an entry, this can
be fixed beforehand by using the [fill down operation](fill_down.md).
Row order
---------
In the absence of extra columns, it is important to note that the order in which
the key/value pairs appear matters. Specifically, the operation will use the first key it encounters as the delimiter for entries:
every time it encounters this key again, it will produce a new row and add the following other key/value pairs to that row.
Consider for instance the following table:
| Field | Data |
|----------|-----------------------|
| **Name** | Galanthus nivalis |
| Color | White |
| IUCN ID | 162168 |
| **Name** | Crinum variabile |
| **Name** | Narcissus cyclamineus |
| Color | Yellow |
| IUCN ID | 161899 |
The occurrences of the "Name" value in the "Field" column define the boundaries of the entries. Because there is
no other row between the "Crinum variabile" and the "Narcissus cyclamineus" rows, the "Color" and "IUCN ID" columns
for the "Crinum variabile" entry will be empty:
| Name | Color | IUCN ID |
|-----------------------|----------|---------|
| Galanthus nivalis | White | 162168 |
| Crinum variabile | | |
| Narcissus cyclamineus | Yellow | 161899 |
This sensitivity to order is removed if there are extra columns: in that case, the first extra column will serve as root identifier
for the entries.
Behaviour in records mode
-------------------------
In records mode, this operation behaves just like in rows mode, except that any facets applied to it will be interpreted in records mode.

View File

@ -0,0 +1,7 @@
---
id: recon
title: Reconcile
sidebar_label: Reconcile
---
Reconciliation is the process of matching names to unique identifiers in third-party databases.

View File

@ -0,0 +1,7 @@
---
id: transform
title: Transforming Columns
sidebar_label: Transform
---
Transforming a column lets you derive new column values using expressions in GREL or other languages.

8
docs/src/records_mode.md Normal file
View File

@ -0,0 +1,8 @@
---
id: records_mode
title: The records mode
sidebar_label: Records mode
---

17
docs/src/user_data.md Normal file
View File

@ -0,0 +1,17 @@
---
id: user_data
title: Backing Up Workspace Data
sidebar_label: Workspace Data
---
OpenRefine stores its workspace locally. The exact location depends on the operating system.
# Windows
# Mac OS
# Linux
# Using Docker

2
docs/static/css/custom.css vendored Normal file
View File

@ -0,0 +1,2 @@
/* custom CSS can be added here */

BIN
docs/static/img/openrefine_logo.png vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.3 KiB