Merge branch 'docs-formatting' of https://github.com/allanaaa/OpenRefine into docs-formatting

This commit is contained in:
allanaaa 2021-01-05 12:07:32 -05:00
commit 4a4be06b24
56 changed files with 1138 additions and 756 deletions

View File

@ -1,104 +0,0 @@
name: Java CI
on:
push:
branches:
- master
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2.3.4
with:
fetch-depth: 0 # This is wasteful, but needed for git describe
- name: Restore dependency cache
uses: actions/cache@v2.1.3
with:
path: ~/.m2/repository
key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-maven-
- name: Set up JDK 1.8
uses: actions/setup-java@v1
with:
java-version: 1.8
- name: Install genisoimage and jq
run: sudo apt-get install genisoimage jq
- name: Build with Maven
run: ./refine build
- name: Get the OpenRefine snapshot version
run: echo ::set-env name=OR_VERSION::$(cat ./main/webapp/WEB-INF/classes/git.properties | jq -r '.["git.commit.id.describe"]')
env:
ACTIONS_ALLOW_UNSECURE_COMMANDS: true
- name: Generate dist files
run: ./refine dist ${{ env.OR_VERSION }}
- name: Create Release
if: github.repository == 'OpenRefine/OpenRefine'
id: create_release
run: echo ::set-env name=API_RELEASE::$(./.github/workflows/release_manager.sh)
env:
ACTIONS_ALLOW_UNSECURE_COMMANDS: true
OR_VERSION: ${{ env.OR_VERSION }}
RELEASE_REPO_OWNER: OpenRefine
RELEASE_REPO_TOKEN: ${{ secrets.RELEASE_REPO_TOKEN }}
- name: Upload Release Asset Windows
id: upload-release-asset-win
if: github.repository == 'OpenRefine/OpenRefine'
uses: actions/upload-release-asset@v1.0.2
env:
GITHUB_TOKEN: ${{ secrets.RELEASE_REPO_TOKEN }}
with:
upload_url: ${{ env.API_RELEASE }}
asset_path: ./packaging/target/openrefine-win-${{ env.OR_VERSION }}.zip
asset_name: openrefine-win-${{ env.OR_VERSION }}.zip
asset_content_type: application/zip
- name: Upload Release Asset Linux
id: upload-release-asset-linux
if: github.repository == 'OpenRefine/OpenRefine'
uses: actions/upload-release-asset@v1.0.2
env:
GITHUB_TOKEN: ${{ secrets.RELEASE_REPO_TOKEN }}
with:
upload_url: ${{ env.API_RELEASE }}
asset_path: ./packaging/target/openrefine-linux-${{ env.OR_VERSION }}.tar.gz
asset_name: openrefine-linux-${{ env.OR_VERSION }}.tar.gz
asset_content_type: application/tar+gzip
- name: Upload Release Asset Mac
id: upload-release-asset-mac
if: github.repository == 'OpenRefine/OpenRefine'
uses: actions/upload-release-asset@v1.0.2
env:
GITHUB_TOKEN: ${{ secrets.RELEASE_REPO_TOKEN }}
with:
upload_url: ${{ env.API_RELEASE }}
asset_path: ./packaging/target/openrefine-mac-${{ env.OR_VERSION }}.dmg
asset_name: openrefine-mac-${{ env.OR_VERSION }}.dmg
asset_content_type: application/x-apple-diskimage
- name: Delete older releases
id: delete-older-releases
if: github.repository == 'OpenRefine/OpenRefine'
uses: dev-drprasad/delete-older-releases@v0.1.0
with:
repo: OpenRefine/OpenRefine-snapshot-releases
# Specifies number of latest releases (sorted by created_at) to keep. Pass 0 if you want to delete all releases
keep_latest: 10
# Specifies whether to delete tags associated to older releases or not.
# Older tags without any associated releases will not be deleted
delete_tags: true
env:
GITHUB_TOKEN: ${{ secrets.RELEASE_REPO_TOKEN }}

119
.github/workflows/pull_request.yml vendored Normal file
View File

@ -0,0 +1,119 @@
name: Continuous Integration
on: [pull_request_target]
jobs:
server_tests:
strategy:
matrix:
java: [ 8, 14 ]
runs-on: ubuntu-latest
services:
postgres:
image: postgres
ports:
- 5432
env:
POSTGRES_USER: postgres
POSTGRES_PASSWORD: 'postgres'
POSTGRES_DB: test_db
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
mysql:
image: mysql:8
ports:
- 3306
env:
MYSQL_ROOT_PASSWORD: root
options: >-
--health-cmd "mysqladmin ping"
--health-interval 5s
--health-timeout 2s
--health-retries 3
steps:
- uses: actions/checkout@v2.3.4
- name: Restore dependency cache
uses: actions/cache@v2.1.3
with:
path: ~/.m2/repository
key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-maven-
- name: Set up Java ${{ matrix.java }}
uses: actions/setup-java@v1
with:
java-version: ${{ matrix.java }}
- name: Configure connections to databases
id: configure_db_connections
run: cat extensions/database/tests/conf/github_actions_tests.xml | sed -e "s/MYSQL_PORT/${{ job.services.mysql.ports[3306] }}/g" | sed -e "s/POSTGRES_PORT/${{ job.services.postgres.ports[5432] }}/g" > extensions/database/tests/conf/tests.xml
- name: Populate databases with test data
id: populate_databases_with_test_data
run: |
mysql -u root -h 127.0.0.1 -P ${{ job.services.mysql.ports[3306] }} -proot -e 'CREATE DATABASE test_db;'
mysql -u root -h 127.0.0.1 -P ${{ job.services.mysql.ports[3306] }} -proot < extensions/database/tests/conf/test-mysql.sql
psql -U postgres test_db -h 127.0.0.1 -p ${{ job.services.postgres.ports[5432] }} < extensions/database/tests/conf/test-pgsql.sql
env:
PGPASSWORD: postgres
- name: Build and test with Maven
run: mvn jacoco:prepare-agent test
- name: Submit test coverage to Coveralls
run: |
mvn prepare-package -DskipTests=true
mvn jacoco:report coveralls:report -DrepoToken=${{ secrets.COVERALLS_TOKEN }} -DpullRequest=${{ github.event.number }}
ui_tests:
strategy:
matrix:
browser: ['chrome']
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2.3.4
- name: Restore dependency cache
uses: actions/cache@v2.1.3
with:
path: ~/.m2/repository
key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-maven-
- name: Set up Java 8
uses: actions/setup-java@v1
with:
java-version: 8
- name: Build OpenRefine
run: ./refine build
- name: Restore Tests dependency cache
uses: actions/cache@v2.1.3
with:
path: '**/node_modules'
key: ${{ runner.os }}-modules-${{ hashFiles('**/yarn.lock') }}
restore-keys: |
${{ runner.os }}-yarn
- name: Install test dependencies
run: |
cd ./main/tests/cypress
yarn install
- name: Test with Cypress on ${{ matrix.browser }}
run: |
echo REFINE_MIN_MEMORY=1400M >> ./refine.ini
echo REFINE_MEMORY=4096M >> ./refine.ini
./refine ui_test ${{ matrix.browser }} s5du3k "${{ secrets.CYPRESS_RECORD_KEY }}"

203
.github/workflows/snapshot_release.yml vendored Normal file
View File

@ -0,0 +1,203 @@
name: Snapshot release
on:
push:
branches:
- master
jobs:
cypress_tests:
runs-on: ubuntu-latest
strategy:
matrix:
browser: ['edge', 'chrome']
steps:
- uses: actions/checkout@v2.3.4
- name: Restore dependency cache
uses: actions/cache@v2.1.3
with:
path: ~/.m2/repository
key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-maven-
- name: Set up Java 8
uses: actions/setup-java@v1
with:
java-version: 8
- name: Install Edge
if: matrix.browser == 'edge'
run: |
sudo curl https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor > microsoft.gpg
sudo install -o root -g root -m 644 microsoft.gpg /etc/apt/trusted.gpg.d/
sudo sh -c 'echo "deb [arch=amd64] https://packages.microsoft.com/repos/edge stable main" > /etc/apt/sources.list.d/microsoft-edge-dev.list'
sudo rm microsoft.gpg
sudo apt-get update
sudo apt-get install microsoft-edge-dev
- name: Build OpenRefine
run: ./refine build
- name: Restore Tests dependency cache
uses: actions/cache@v2.1.3
with:
path: '**/node_modules'
key: ${{ runner.os }}-modules-${{ hashFiles('**/yarn.lock') }}
restore-keys: |
${{ runner.os }}-yarn
- name: Install test dependencies
run: |
cd ./main/tests/cypress
yarn install
- name: Test with Cypress on ${{ matrix.browser }}
run: |
echo REFINE_MIN_MEMORY=1400M >> ./refine.ini
echo REFINE_MEMORY=4096M >> ./refine.ini
./refine ui_test ${{ matrix.browser }} s5du3k "${{ secrets.CYPRESS_RECORD_KEY }}"
build:
services:
postgres:
image: postgres
ports:
- 5432
env:
POSTGRES_USER: postgres
POSTGRES_PASSWORD: 'postgres'
POSTGRES_DB: test_db
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
mysql:
image: mysql:8
ports:
- 3306
env:
MYSQL_ROOT_PASSWORD: root
options: >-
--health-cmd "mysqladmin ping"
--health-interval 5s
--health-timeout 2s
--health-retries 3
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2.3.4
with:
fetch-depth: 0 # This is wasteful, but needed for git describe
- name: Restore dependency cache
uses: actions/cache@v2.1.3
with:
path: ~/.m2/repository
key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-maven-
- name: Set up JDK 1.8
uses: actions/setup-java@v1
with:
java-version: 1.8
- name: Install genisoimage and jq
run: sudo apt-get install genisoimage jq
- name: Configure connections to databases
id: configure_db_connections
run: cat extensions/database/tests/conf/github_actions_tests.xml | sed -e "s/MYSQL_PORT/${{ job.services.mysql.ports[3306] }}/g" | sed -e "s/POSTGRES_PORT/${{ job.services.postgres.ports[5432] }}/g" > extensions/database/tests/conf/tests.xml
- name: Populate databases with test data
id: populate_databases_with_test_data
run: |
mysql -u root -h 127.0.0.1 -P ${{ job.services.mysql.ports[3306] }} -proot -e 'CREATE DATABASE test_db;'
mysql -u root -h 127.0.0.1 -P ${{ job.services.mysql.ports[3306] }} -proot < extensions/database/tests/conf/test-mysql.sql
psql -U postgres test_db -h 127.0.0.1 -p ${{ job.services.postgres.ports[5432] }} < extensions/database/tests/conf/test-pgsql.sql
env:
PGPASSWORD: postgres
- name: Build and test with Maven
run: mvn jacoco:prepare-agent test
- name: Submit test coverage to Coveralls
run: |
mvn prepare-package -DskipTests=true
mvn jacoco:report coveralls:report -DrepoToken=${{ secrets.COVERALLS_TOKEN }} -DpullRequest=${{ github.event.number }} -DserviceName="GitHub Actions" -DserviceBuildNumber=${{ env.GITHUB_RUN_ID }} -Dbranch=master
- name: Get the OpenRefine snapshot version
run: echo ::set-env name=OR_VERSION::$(cat ./main/webapp/WEB-INF/classes/git.properties | jq -r '.["git.commit.id.describe"]')
env:
ACTIONS_ALLOW_UNSECURE_COMMANDS: true
- name: Generate dist files
run: ./refine dist ${{ env.OR_VERSION }}
- name: Create Release
if: github.repository == 'OpenRefine/OpenRefine'
id: create_release
run: echo ::set-env name=API_RELEASE::$(./.github/workflows/release_manager.sh)
env:
ACTIONS_ALLOW_UNSECURE_COMMANDS: true
OR_VERSION: ${{ env.OR_VERSION }}
RELEASE_REPO_OWNER: OpenRefine
RELEASE_REPO_TOKEN: ${{ secrets.RELEASE_REPO_TOKEN }}
- name: Upload Release Asset Windows
id: upload-release-asset-win
if: github.repository == 'OpenRefine/OpenRefine'
uses: actions/upload-release-asset@v1.0.2
env:
GITHUB_TOKEN: ${{ secrets.RELEASE_REPO_TOKEN }}
with:
upload_url: ${{ env.API_RELEASE }}
asset_path: ./packaging/target/openrefine-win-${{ env.OR_VERSION }}.zip
asset_name: openrefine-win-${{ env.OR_VERSION }}.zip
asset_content_type: application/zip
- name: Upload Release Asset Linux
id: upload-release-asset-linux
if: github.repository == 'OpenRefine/OpenRefine'
uses: actions/upload-release-asset@v1.0.2
env:
GITHUB_TOKEN: ${{ secrets.RELEASE_REPO_TOKEN }}
with:
upload_url: ${{ env.API_RELEASE }}
asset_path: ./packaging/target/openrefine-linux-${{ env.OR_VERSION }}.tar.gz
asset_name: openrefine-linux-${{ env.OR_VERSION }}.tar.gz
asset_content_type: application/tar+gzip
- name: Upload Release Asset Mac
id: upload-release-asset-mac
if: github.repository == 'OpenRefine/OpenRefine'
uses: actions/upload-release-asset@v1.0.2
env:
GITHUB_TOKEN: ${{ secrets.RELEASE_REPO_TOKEN }}
with:
upload_url: ${{ env.API_RELEASE }}
asset_path: ./packaging/target/openrefine-mac-${{ env.OR_VERSION }}.dmg
asset_name: openrefine-mac-${{ env.OR_VERSION }}.dmg
asset_content_type: application/x-apple-diskimage
- name: Delete older releases
id: delete-older-releases
if: github.repository == 'OpenRefine/OpenRefine'
uses: dev-drprasad/delete-older-releases@v0.1.0
with:
repo: OpenRefine/OpenRefine-snapshot-releases
# Specifies number of latest releases (sorted by created_at) to keep. Pass 0 if you want to delete all releases
keep_latest: 10
# Specifies whether to delete tags associated to older releases or not.
# Older tags without any associated releases will not be deleted
delete_tags: true
env:
GITHUB_TOKEN: ${{ secrets.RELEASE_REPO_TOKEN }}

View File

@ -1,85 +0,0 @@
os: linux
language: java
dist: bionic
jobs:
include:
- dist: trusty # Fastest build first & for all builds
jdk: oraclejdk8 # Trusty default
# Full matrix only for merges to master or anything to do with release branches e.g. v3.5
- if: (branch = master AND type != pull_request) OR branch ~= /^v\d\.\d.*/
jdk: openjdk11 # Bionic default
- if: branch = master AND type != pull_request OR branch ~= /^v\d\.\d.*/
jdk: openjdk12
dist: xenial # just for a little variety
- if: branch = master AND type != pull_request OR branch ~= /^v\d\.\d.*/
jdk: openjdk13
- if: branch = master AND type != pull_request OR branch ~= /^v\d\.\d.*/
jdk: openjdk14 # replacement for OS X Java 14 build
- if: branch = master AND type != pull_request OR branch ~= /^v\d\.\d.*/
os: osx
osx_image: xcode11.6 # macOS 10.15.4, Oracle JDK 14.0.1
language: java
services: # not supported on os x
addons:
homebrew:
packages:
- mysql
- mariadb@10.3
before_script:
- brew services start mysql
- brew services start postgresql
- brew services start mariadb@10.3
- sleep 15 # wait for databases to start up
# Homebrew postgres workaround - create expected user postgres
- /usr/local/opt/postgres/bin/createuser -s postgres
# FIXME this is duplicated from linux config, but don't know a better way to do it
- mysql -u root -e 'CREATE DATABASE test_db;'
- mysql -u root test_db < extensions/database/tests/conf/travis-mysql.sql
- psql -c 'CREATE DATABASE test_db;' -U postgres
- psql -U postgres test_db < extensions/database/tests/conf/travis-pgsql.sql
- cp extensions/database/tests/conf/travis_tests.xml extensions/database/tests/conf/tests.xml
- if: branch = master AND type != pull_request OR branch ~= /^v\d\.\d.*/
jdk: oraclejdk-ea
- if: branch = master AND type != pull_request OR branch ~= /^v\d\.\d.*/
jdk: openjdk-ea
allow_failures:
- os: osx
- jdk: openjdk-ea
- jdk: oraclejdk-ea
addons:
mariadb: '10.3'
services:
- mysql
- postgresql
env:
# encrypted Codacy key, see https://docs.travis-ci.com/user/encryption-keys/
- secure: "VmS4He99YlI6rdmw8Q25OZ9kUp11sRbt0W1QMBvA5lzNSmhN1Q1KtaMj9AGwpCZWcyGWri4AQxEmloARxACxQHXRmNE7ro2DESGw46RAocBAf+RfBxYTifIyUGu5TnSCQhz56SkgpyWpedZAZWyah9ZxgUMfet4KXFUfeiUgYQA="
before_install:
# Fake out packaging for Travis builds before mvn install
- cp packaging/travis_pom.xml packaging/pom.xml
- mvn process-resources
before_script:
# create test databases for mysql, mariadb and postgresql
- mysql -u root -e 'CREATE DATABASE test_db;'
- mysql -u root test_db < extensions/database/tests/conf/travis-mysql.sql
- psql -c 'CREATE DATABASE test_db;' -U postgres
- psql -U postgres test_db < extensions/database/tests/conf/travis-pgsql.sql
- cp extensions/database/tests/conf/travis_tests.xml extensions/database/tests/conf/tests.xml
script:
- mvn jacoco:prepare-agent test
after_success:
- mvn prepare-package -DskipTests=true
- mvn jacoco:report coveralls:report
cache:
directories:
- $HOME/.m2

View File

@ -1,6 +1,6 @@
# OpenRefine
[![Join the chat at https://gitter.im/OpenRefine/OpenRefine](https://badges.gitter.im/OpenRefine/OpenRefine.svg)](https://gitter.im/OpenRefine/OpenRefine?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) [![Build Status](https://travis-ci.com/OpenRefine/OpenRefine.svg?branch=master)](https://travis-ci.com/OpenRefine/OpenRefine) [![Coverage Status](https://coveralls.io/repos/github/OpenRefine/OpenRefine/badge.svg?branch=master)](https://coveralls.io/github/OpenRefine/OpenRefine?branch=master) [![Translation progress](https://hosted.weblate.org/widgets/openrefine/-/svg-badge.svg)](https://hosted.weblate.org/engage/openrefine/?utm_source=widget) [![Total alerts](https://img.shields.io/lgtm/alerts/g/OpenRefine/OpenRefine.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/OpenRefine/OpenRefine/alerts/)
[![Join the chat at https://gitter.im/OpenRefine/OpenRefine](https://badges.gitter.im/OpenRefine/OpenRefine.svg)](https://gitter.im/OpenRefine/OpenRefine) ![Java CI](https://github.com/OpenRefine/OpenRefine/workflows/Java%20CI/badge.svg) [![Coverage Status](https://coveralls.io/repos/github/OpenRefine/OpenRefine/badge.svg?branch=master)](https://coveralls.io/github/OpenRefine/OpenRefine?branch=master) [![Translation progress](https://hosted.weblate.org/widgets/openrefine/-/svg-badge.svg)](https://hosted.weblate.org/engage/openrefine/?utm_source=widget) [![Total alerts](https://img.shields.io/lgtm/alerts/g/OpenRefine/OpenRefine.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/OpenRefine/OpenRefine/alerts/)
OpenRefine is a Java-based power tool that allows you to load data, understand it,
clean it up, reconcile it, and augment it with data coming from

View File

@ -26,7 +26,7 @@ before_test:
PATH=C:\Program Files\PostgreSQL\9.6\bin\;C:\Program Files\MySQL\MySQL Server 5.7\bin\;%PATH%
SET MYSQL_PWD=Password12!
mysql -u root --password=Password12! -e "create database test_db;"
mysql -u root test_db --password=Password12! < extensions\database\tests\conf\travis-mysql.sql
mysql -u root test_db --password=Password12! < extensions\database\tests\conf\test-mysql.sql
echo "localhost:*:test_db:postgres:Password12!" > C:\Program Files\PostgreSQL\9.6\pgpass.conf
echo "localhost:*:test_db:postgres:Password12!" > pgpass.conf
echo "localhost:*:test_db:postgres:Password12!" > %userprofile%\pgpass.conf
@ -34,10 +34,10 @@ before_test:
SET PGPASSWORD=Password12!
SET PGUSER=postgres
createdb test_db
psql -U postgres test_db < extensions\database\tests\conf\travis-pgsql.sql
psql -U postgres test_db < extensions\database\tests\conf\test-pgsql.sql
copy extensions\database\tests\conf\appveyor_tests.xml extensions\database\tests\conf\tests.xml
copy packaging\travis_pom.xml packaging\pom.xml
copy packaging\test_pom.xml packaging\pom.xml
- cmd: |-
mvn process-resources
mvn install -DskipTests=true -Dmaven.javadoc.skip=true -B -V

View File

@ -6,34 +6,117 @@ sidebar_label: Exporting
## Overview
Once your data is cleaned, you will need to get it out of OpenRefine and into the system of your choice. OpenRefine outputs a number of file formats, can upload your data directly into Google Sheets, and can create or update statements on Wikidata.
You can also [export your full project data](#export-a-project) so that it can be opened by someone else using OpenRefine (or yourself, on another computer).
## Export data
Note you will only export data in the current view - that is, with current filters and facets applied.
Many of the following options only export data in the current view - that is, with current filters and facets applied. Some will give you the choice to export your entire dataset or just your current view.
To export from a project, click the <span class="menuItems">Export</span> dropdown button at the top right corner and pick the format you want. You options are:
* TSV/CSV
* HTML table
* Tab-separated value (TSV) or Comma-separated value (CSV)
* HTML-formatted table
* Excel (XLS or XLSX)
* ODF spreadsheet
* Google Sheets \
* Upload to Google Sheets (requires [Google account authorization](starting#google-sheet-from-drive))
* [Custom tabular exporter](#custom-tabular-exporter)
* [SQL statement exporter](#sql-statement-exporter)
* [Templating exporter](#templating-exporter)
* Custom tabular export
* SQL
* Templating export \
You can also export reconciled data to Wikidata, or export your Wikidata schema for future use with other OpenRefine projects:
* Upload edits to Wikidata
* Export to QuickStatement
* Export Wikidata schema
* [Upload edits to Wikidata](wikidata#upload-edits-to-wikidata)
* [Export to QuickStatements](wikidata#quickstatements-export) (version 1)
* [Export Wikidata schema](wikidata#import-and-export-schema)
### Custom tabular exporter
![A screenshot of the custom tabular content tab.](/img/custom-tabular-exporter.png)
With the custom tabular exporter, you can choose which of your data to export, the separator you wish to use, and whether you'd like to download it to your computer or upload it into a Google Sheet.
On the <span class="tabLabels">Content</span> tab, you can drag and drop the columns appearing in the column list to reorder the output. The options for reconciled and date data are applied to each column individually.
This exporter is especially useful with reconciled data, as you can choose whether you wish to output the cells' original values, the matched values, or the matched IDs. Ouputting “match entity's name”, “matched entity's ID”, or “cell's content” will output, respectively, the contents of `cell.recon.match.name`, `cell.recon.match.id`, and `cell.value`.
“Output nothing for unmatched cells” will export empty cells for both newly-created matches and cells with no chosen matches. “Link to matched entity's page” will produce hyperlinked text in an HTML table output, but have no effect in other formats.
At this time, the date-formatting options in this window do not work. You can [keep track of this issue on Github](https://github.com/OpenRefine/OpenRefine/issues/3368).
In the future, you will also be able to choose how to [output date-formatted cells](exploring#dates). You can create a custom date output by using [formatting according to the SimpleDateFormat parsing key found here](grelfunctions#todateo-b-monthfirst-s-format1-s-format2-).
![A screenshot of the custom tabular file download tab.](/img/custom-tabular-exporter2.png)
On the <span class="tabLabels">Download</span> tab, you can generate a preview of how the first ten rows of your dataset will output. If you do not choose one of the file formats on the right, the <span class="buttonLabels">Download</span> button will generate a text file. On the <span class="tabLabels">Upload</span> tab, you can create a new Google Sheet.
With the <span class="tabLabels">Option Code</span> tab, you can copy JSON of your current settings to reuse on another project, or you can paste in existing JSON settings to apply to the current project.
### SQL exporter
The SQL exporter creates a SQL statement containing the data youve exported, which you can use to overwrite or add to an existing database. Choosing <span class="menuItems">Export</span><span class="menuItems">SQL exporter</span> will bring up a window with two tabs: one to define what data to output, and another to modify other aspects of the SQL statement with options to preview and download the statement.
![A screenshot of the SQL statement content window.](/img/sql-exporter.png)
The <span class="tabLabels">Content</span> tab allows you to craft your dataset into an SQL table. From here, you can choose which columns to export, the data type to export for each (or choose "VARCHAR"), and the maximum character length for each field (if applicable based on the data type). You can set a default value for empty cells after unchecking “Allow null” in one or more columns.
With this output tool, you can choose whether to output only currently visible rows, or all the rows in your dataset, as well as whether to include empty rows. Trimming column names will remove their whitespace characters.
![A screenshot of the SQL statement download window.](/img/sql-exporter2.png)
The <span class="tabLabels">Download</span> tab allows you to finalize your complete SQL statement.
<span class="fieldLabels">Include schema</span> means that you will start your statement with the creation of a table. Without that, you will only have an INSERT statement.
<span class="fieldLabels">Include content</span> means the INSERT statement with data from your project. Without that, you will only create empty columns.
You can include DROP and IF EXISTS if you require them, and set a name for the table which the statement will refer to.
You can then preview your statement, which will open up a new browser tab/window showing a statement with the first ten rows of your data (if included), or you can save a `.sql` file to your computer.
### Templating exporter
If you pick <span class="menuItems">Templating…</span> from the <span class="menuItems">Export</span> dropdown menu, you can “roll your own” exporter. This is useful for formats that we don't support natively yet, or won't support. The Templating exporter generates JSON by default.
The window that appears allows you to set your own separators, prefix, and suffix to create a complete dataset in the language of your choice. In the <span class="fieldLabels">Row Template</span> section, you can choose which columns to generate from each row by calling them with variables.
This can be used to:
* output reconciliation data (`cells["column name"].recon.match.name`, `.recon.match.id`, and `.recon.best.name`, for example) instead of cell values
* create multiple columns of output from different member fields of a single project column
* employ GREL expressions to modify cell data for output (for example, `cells["column name"].value.toUppercase()`).
Anything that appears inside doubled curly braces ({{}}) is treated as a GREL expression; anything outside is generated as straight text. You can use Jython or Clojure by declaring it at the start: for example, `{{jython:return cells["Author"].value}}` will run a Jython expression.
:::caution
Note that some syntax is different in this tool than elsewhere in OpenRefine: a forward slash must be escaped with a backslash, while other characters do not need escaping. You cannot, at this time, include a closing curly brace (}) anywhere in your expression, or it will cause it to malfunction.
:::
You can include [regular expressions](expressions#regular-expressions) as usual (inside forward slashes, with any GREL function that accepts them). For example, you could output a version of your cells with punctuation removed, using an expression such as `{{jsonize(cells["Column Name"].value.replaceChars("/[.!?$&,/]/",""))}}`.
You could also simply output a plain-text document inserting data from your project into sentences (for example, "In `{{cells["Year"].value}}` we received `{{cells["RequestCount"].value}}` requests.").
You can use the shorthand `${Column Name}` (no need for quotes) to insert column values directly. You cannot use this inside an expression, because of the closing curly brace.
If your projects is in records mode, the <span class="fieldLabels">Row separator</span> field will insert a separator between records, rather than individual rows. Rows inside a single record will be directly appended to one another as per the content in the <span class="fieldLabels">Row Template</span> field.
![A screenshot of the Templating exporter generating JSON by default.](/img/templating-exporter.png)
Once you have created your template, you may wish to save the text you produced in each field, in order to reuse it in the future. Once you click <span class="buttonLabels">Export</span> OpenRefine will output a simple text file, and your template will be discarded.
We have recipes on using the Templating exporter to [produce several different formats](https://github.com/OpenRefine/OpenRefine/wiki/Recipes#12-templating-exporter).
## Export a project
You can share a project in progress with another computer, a colleague, or with someone who wants to check your history. This can be useful for showing that your data cleanup didnt distort or manipulate the information in any way. Once you have exported a project, another OpenRefine installation can [import it as a new project](starting#import-a-project).
:::caution
OpenRefine project archives contain confidential data from previous steps which is still accessible to anyone who has the file. If you are hoping to keep your original dataset hidden for privacy reasons, such as using OpenRefine to anonymize information, do not share your project archive.
:::
* tar.gz only
* Optional rename
* Local or to Google Drive
* Doesnt supply a Google Drive link, just gives a confirmation message
* Other user (or you on another computer) will need to download it and save it locally in order to import it
From the <span class="menuItems">Export</span> dropdown, select <span class="menuItems">OpenRefine project archive to file</span>. OpenRefine exports your full project with all of its history. It does not export any current views or applied facets. Any reconciliation information will be preserved, but the importing installation will need to add the same reconciliation services to keep working with that data.
OpenRefine exports files in `.tar.gz` format. You can rename the file when you save it; otherwise it will bear the project name. You can either save it locally or upload it to Google Drive (which requires you to authorize a Google account), using the <span class="menuItems">OpenRefine project archive to Google Drive...</span> option. OpenRefine will not share the link with you, only confirm that the file was uploaded.
## Export operations
You can [save and re-apply the history of any project](running#reusing-operations) (all the operations shown in the Undo/Redo tab). This creates JSON that you can save for later reuse on another OpenRefine project.

View File

@ -64,7 +64,7 @@ If you want to run only the server side portion of the tests, use:
If you are running the UI tests for the first time, [you must go through the installation process.](functional-tests)
If you want to run only the client side portion of the tests, use:
```shell
yarn --cwd ./main/tests/cypress run cypress open
./refine ui_test chrome
```
## Running

View File

@ -21,12 +21,13 @@ cd ./main/tests/cypress
yarn install
```
Cypress always assumes that OpenRefine is up and running on the local machine, the tests themselves do not launch OpenRefine, nor restarts it.
Cypress tests can be started in two modes:
Once OpenRefine is running, Cypress tests can be started in two modes
### Development / Debugging mode
Dev mode assumes that OpenRefine is up and running on the local machine, the tests themselves do not launch OpenRefine, nor restarts it.
Run :
```shell
@ -34,12 +35,15 @@ yarn --cwd ./main/tests/cypress run cypress open
```
It will open the Cypress test runner, where you can choose, replay, visualize tests.
This is the recommended way to run tests when adding or fixing tests
This is the recommended way to run tests when adding or fixing tests.
The runners assumes
### Command-line mode
Command line mode will starts OpenRefine with a temporary folder for data
```shell
yarn --cwd ./main/tests/cypress run cypress run
./refine ui_test chrome
```
It will run all tests in the command-line, without windows, displaying results in the standard output

Binary file not shown.

After

Width:  |  Height:  |  Size: 31 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

BIN
docs/static/img/sql-exporter.png vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

BIN
docs/static/img/sql-exporter2.png vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

BIN
docs/static/img/templating-exporter.png vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 42 KiB

View File

@ -5262,9 +5262,9 @@ inherits@2.0.3:
integrity sha1-Yzwsg+PaQqUC9SRmAiSA9CCCYd4=
ini@^1.3.5, ini@~1.3.0:
version "1.3.5"
resolved "https://registry.yarnpkg.com/ini/-/ini-1.3.5.tgz#eee25f56db1c9ec6085e0c22778083f596abf927"
integrity sha512-RZY5huIKCMRWDUqZlEi72f/lmXKMvuszcMBduliQ3nnWbx9X/ZBQO7DijMEYS9EhHBb2qacRUMtC7svLwe0lcw==
version "1.3.7"
resolved "https://registry.yarnpkg.com/ini/-/ini-1.3.7.tgz#a09363e1911972ea16d7a8851005d84cf09a9a84"
integrity sha512-iKpRpXP+CrP2jyrxvg1kMUpXDyRUFDWurxbnVT1vQPx+Wz9uCYsMIqYuSBLV+PAaZG/d7kRLKRFc9oDMsH+mFQ==
inline-style-parser@0.1.1:
version "0.1.1"

View File

@ -153,7 +153,7 @@
<dependency>
<groupId>org.xerial</groupId>
<artifactId>sqlite-jdbc</artifactId>
<version>3.32.3.2</version>
<version>3.34.0</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
@ -189,7 +189,7 @@
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
<version>3.6.28</version>
<version>3.7.0</version>
<scope>test</scope>
</dependency>

View File

@ -18,12 +18,12 @@
<parameter name = "pgSqlDbPassword" value="Password12!"/>
<parameter name = "pgSqlTestTable" value="test_table"/>
<parameter name = "mariaDbName" value="test_db"/>
<parameter name = "mariaDbHost" value="127.0.0.1"/>
<parameter name = "mariadbDbName" value="test_db"/>
<parameter name = "mariadbDbHost" value="127.0.0.1"/>
<parameter name = "mariadbDbPort" value="3306"/>
<parameter name = "mariaDbUser" value="root"/>
<parameter name = "mariaDbPassword" value=""/>
<parameter name = "mariaDbTestTable" value="test_table"/>
<parameter name = "mariadbDbUser" value="root"/>
<parameter name = "mariadbDbPassword" value=""/>
<parameter name = "mariadbTestTable" value="test_table"/>
<parameter name = "sqliteDbName" value="extension_test_db.sqlite"/>
<parameter name = "sqliteDbHost" value=""/>

View File

@ -0,0 +1,55 @@
<?xml version="1.0" encoding="UTF-8"?>
<suite name="DatabaseExtensionUnitTestSuite">
<parameter name = "mySqlDbName" value="test_db"/>
<parameter name = "mySqlDbHost" value="127.0.0.1"/>
<parameter name = "mySqlDbPort" value="MYSQL_PORT"/>
<parameter name = "mySqlDbUser" value="root"/>
<parameter name = "mySqlDbPassword" value="root"/>
<parameter name = "mySqlTestTable" value="test_table"/>
<parameter name = "pgSqlDbName" value="test_db"/>
<parameter name = "pgSqlDbHost" value="127.0.0.11"/>
<parameter name = "pgSqlDbPort" value="POSTGRES_PORT"/>
<parameter name = "pgSqlDbUser" value="postgres"/>
<parameter name = "pgSqlDbPassword" value="postgres"/>
<parameter name = "pgSqlTestTable" value="test_table"/>
<parameter name = "mariadbDbName" value="test_db"/>
<parameter name = "mariadbDbHost" value="127.0.0.1"/>
<parameter name = "mariadbDbPort" value="MYSQL_PORT"/>
<parameter name = "mariadbDbUser" value="root"/>
<parameter name = "mariadbDbPassword" value="root"/>
<parameter name = "mariadbTestTable" value="test_table"/>
<parameter name = "sqliteDbName" value="extension_test_db.sqlite"/>
<parameter name = "sqliteDbHost" value=""/>
<parameter name = "sqliteDbPort" value=""/>
<parameter name = "sqliteDbUser" value=""/>
<parameter name = "sqliteDbPassword" value=""/>
<parameter name = "sqliteDbTestTable" value="test_table"/>
<test name="DatabaseExtensionUnitTest">
<groups>
<run>
</run>
</groups>
<classes>
<class name="com.google.refine.extension.database.DatabaseImportControllerTest"/>
<class name="com.google.refine.extension.database.DatabaseServiceTest"/>
<class name="com.google.refine.extension.database.SimpleTextEncryptorTest"/>
<class name="com.google.refine.extension.database.cmd.ConnectCommandTest"/>
<class name="com.google.refine.extension.database.cmd.ExecuteQueryCommandTest"/>
<class name="com.google.refine.extension.database.cmd.SavedConnectionCommandTest"/>
<class name="com.google.refine.extension.database.cmd.TestConnectCommandTest"/>
<class name="com.google.refine.extension.database.cmd.TestQueryCommandTest"/>
<class name="com.google.refine.extension.database.mariadb.MariaDBConnectionManagerTest"/>
<class name="com.google.refine.extension.database.mariadb.MariaDBDatabaseServiceTest"/>
<class name="com.google.refine.extension.database.mysql.MySQLConnectionManagerTest"/>
<class name="com.google.refine.extension.database.mysql.MySQLDatabaseServiceTest"/>
<class name="com.google.refine.extension.database.pgsql.PgSQLConnectionManagerTest"/>
<class name="com.google.refine.extension.database.pgsql.PgSQLDatabaseServiceTest" />
<class name="com.google.refine.extension.database.sqlite.SQLiteConnectionManagerTest"/>
<class name="com.google.refine.extension.database.sqlite.SQLiteDatabaseServiceTest" />
</classes>
</test>
</suite>

View File

@ -22,12 +22,12 @@
<parameter name = "pgSqlDbPassword" value=""/>
<parameter name = "pgSqlTestTable" value="test_table"/>
<parameter name = "mariaDbName" value="test_db"/>
<parameter name = "mariaDbHost" value="127.0.0.1"/>
<parameter name = "mariadbDbName" value="test_db"/>
<parameter name = "mariadbDbHost" value="127.0.0.1"/>
<parameter name = "mariadbDbPort" value="3306"/>
<parameter name = "mariaDbUser" value="root"/>
<parameter name = "mariaDbPassword" value=""/>
<parameter name = "mariaDbTestTable" value="test_table"/>
<parameter name = "mariadbDbUser" value="root"/>
<parameter name = "mariadbDbPassword" value=""/>
<parameter name = "mariadbTestTable" value="test_table"/>
<parameter name = "sqliteDbName" value="extension_test_db.sqlite"/>
<parameter name = "sqliteDbHost" value=""/>

View File

@ -1,57 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<suite name="DatabaseExtensionUnitTestSuite">
<parameter name = "mySqlDbName" value="test_db"/>
<parameter name = "mySqlDbHost" value="127.0.0.1"/>
<parameter name = "mySqlDbPort" value="3306"/>
<parameter name = "mySqlDbUser" value="root"/>
<parameter name = "mySqlDbPassword" value=""/>
<parameter name = "mySqlTestTable" value="test_table"/>
<parameter name = "pgSqlDbName" value="test_db"/>
<parameter name = "pgSqlDbHost" value="127.0.0.1"/>
<parameter name = "pgSqlDbPort" value="5432"/>
<parameter name = "pgSqlDbUser" value="postgres"/>
<parameter name = "pgSqlDbPassword" value=""/>
<parameter name = "pgSqlTestTable" value="test_table"/>
<parameter name = "mariaDbName" value="test_db"/>
<parameter name = "mariaDbHost" value="127.0.0.1"/>
<parameter name = "mariadbDbPort" value="3306"/>
<parameter name = "mariaDbUser" value="root"/>
<parameter name = "mariaDbPassword" value=""/>
<parameter name = "mariaDbTestTable" value="test_table"/>
<parameter name = "sqliteDbName" value="extension_test_db.sqlite"/>
<parameter name = "sqliteDbHost" value=""/>
<parameter name = "sqliteDbPort" value=""/>
<parameter name = "sqliteDbUser" value=""/>
<parameter name = "sqliteDbPassword" value=""/>
<parameter name = "sqliteDbTestTable" value="test_table"/>
<test name="DatabaseExtensionUnitTest">
<!-- Temporarily exclude MySQL and MariaDB from Travis tests until MySQL is working on Travis again. -->
<!-- See https://github.com/OpenRefine/OpenRefine/issues/2043 -->
<groups>
<run>
</run>
</groups>
<classes>
<class name="com.google.refine.extension.database.DatabaseImportControllerTest"/>
<class name="com.google.refine.extension.database.DatabaseServiceTest"/>
<class name="com.google.refine.extension.database.SimpleTextEncryptorTest"/>
<class name="com.google.refine.extension.database.cmd.ConnectCommandTest"/>
<class name="com.google.refine.extension.database.cmd.ExecuteQueryCommandTest"/>
<class name="com.google.refine.extension.database.cmd.SavedConnectionCommandTest"/>
<class name="com.google.refine.extension.database.cmd.TestConnectCommandTest"/>
<class name="com.google.refine.extension.database.cmd.TestQueryCommandTest"/>
<class name="com.google.refine.extension.database.mariadb.MariaDBConnectionManagerTest"/>
<class name="com.google.refine.extension.database.mariadb.MariaDBDatabaseServiceTest"/>
<class name="com.google.refine.extension.database.mysql.MySQLConnectionManagerTest"/>
<class name="com.google.refine.extension.database.mysql.MySQLDatabaseServiceTest"/>
<class name="com.google.refine.extension.database.pgsql.PgSQLConnectionManagerTest"/>
<class name="com.google.refine.extension.database.pgsql.PgSQLDatabaseServiceTest" />
<class name="com.google.refine.extension.database.sqlite.SQLiteConnectionManagerTest"/>
<class name="com.google.refine.extension.database.sqlite.SQLiteDatabaseServiceTest" />
</classes>
</test>
</suite>

View File

@ -21,7 +21,7 @@ public class DatabaseTestConfig extends DBExtensionTests {
@BeforeSuite
@Parameters({ "mySqlDbName", "mySqlDbHost", "mySqlDbPort", "mySqlDbUser", "mySqlDbPassword", "mySqlTestTable",
"pgSqlDbName", "pgSqlDbHost", "pgSqlDbPort", "pgSqlDbUser", "pgSqlDbPassword", "pgSqlTestTable",
"mariadbDbName", "mariadbDbHost", "mariadbDbPort", "mariadbyDbUser", "mariadbDbPassword", "mariadbTestTable",
"mariadbDbName", "mariadbDbHost", "mariadbDbPort", "mariadbDbUser", "mariadbDbPassword", "mariadbTestTable",
"sqliteDbName", "sqliteTestTable"})
public void beforeSuite(
@Optional(DEFAULT_MYSQL_DB_NAME) String mySqlDbName, @Optional(DEFAULT_MYSQL_HOST) String mySqlDbHost,
@ -33,7 +33,7 @@ public class DatabaseTestConfig extends DBExtensionTests {
@Optional(DEFAULT_PGSQL_PASSWORD) String pgSqlDbPassword, @Optional(DEFAULT_TEST_TABLE) String pgSqlTestTable,
@Optional(DEFAULT_MARIADB_NAME) String mariadbDbName, @Optional(DEFAULT_MARIADB_HOST) String mariadbDbHost,
@Optional(DEFAULT_MARIADB_PORT) String mariadbDbPort, @Optional(DEFAULT_MARIADB_USER) String mariadbyDbUser,
@Optional(DEFAULT_MARIADB_PORT) String mariadbDbPort, @Optional(DEFAULT_MARIADB_USER) String mariadbDbUser,
@Optional(DEFAULT_MARIADB_PASSWORD) String mariadbDbPassword, @Optional(DEFAULT_TEST_TABLE) String mariadbTestTable,
@Optional(DEFAULT_SQLITE_DB_NAME) String sqliteDbName, @Optional(DEFAULT_TEST_TABLE) String sqliteTestTable)
@ -64,7 +64,7 @@ public class DatabaseTestConfig extends DBExtensionTests {
mariadbDbConfig.setDatabasePassword(mariadbDbPassword);
mariadbDbConfig.setDatabasePort(Integer.parseInt(mariadbDbPort));
mariadbDbConfig.setDatabaseType(MariaDBDatabaseService.DB_NAME);
mariadbDbConfig.setDatabaseUser(mariadbyDbUser);
mariadbDbConfig.setDatabaseUser(mariadbDbUser);
mariadbDbConfig.setUseSSL(false);
sqliteDbConfig = new DatabaseConfiguration();

View File

@ -24,7 +24,7 @@ public class MariaDBConnectionManagerTest extends DBExtensionTests {
@BeforeTest
@Parameters({ "mariaDbName", "mariaDbHost", "mariaDbPort", "mariaDbUser", "mariaDbPassword", "mariaDbTestTable"})
@Parameters({ "mariadbDbName", "mariadbDbHost", "mariadbDbPort", "mariadbDbUser", "mariadbDbPassword", "mariaTestTable"})
public void beforeTest(@Optional(DEFAULT_MARIADB_NAME) String mariaDbName, @Optional(DEFAULT_MARIADB_HOST) String mariaDbHost,
@Optional(DEFAULT_MARIADB_PORT) String mariaDbPort, @Optional(DEFAULT_MARIADB_USER) String mariaDbUser,
@Optional(DEFAULT_MARIADB_PASSWORD) String mariaDbPassword, @Optional(DEFAULT_TEST_TABLE) String mariaDbTestTable) {

View File

@ -30,7 +30,7 @@ public class MariaDBDatabaseServiceTest extends DBExtensionTests{
@BeforeTest
@Parameters({ "mariaDbName", "mariaDbHost", "mariaDbPort", "mariaDbUser", "mariaDbPassword", "mariaDbTestTable"})
@Parameters({ "mariadbDbName", "mariadbDbHost", "mariadbDbPort", "mariadbDbUser", "mariadbDbPassword", "mariadbTestTable"})
public void beforeTest(@Optional(DEFAULT_MARIADB_NAME) String mariaDbName, @Optional(DEFAULT_MARIADB_HOST) String mariaDbHost,
@Optional(DEFAULT_MARIADB_PORT) String mariaDbPort, @Optional(DEFAULT_MARIADB_USER) String mariaDbUser,
@Optional(DEFAULT_MARIADB_PASSWORD) String mariaDbPassword, @Optional(DEFAULT_TEST_TABLE) String mariaDbTestTable) {

View File

@ -151,7 +151,7 @@
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
<version>3.6.28</version>
<version>3.7.0</version>
<scope>test</scope>
</dependency>

View File

@ -290,6 +290,11 @@
<artifactId>clojure</artifactId>
<version>1.10.1</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents.client5</groupId>
<artifactId>httpclient5</artifactId>
<version>5.0.3</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
@ -308,7 +313,7 @@
<dependency>
<groupId>joda-time</groupId>
<artifactId>joda-time</artifactId>
<version>2.10.8</version>
<version>2.10.9</version>
</dependency>
<dependency>
<groupId>org.apache.jena</groupId>
@ -343,7 +348,7 @@
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>30.0-jre</version>
<version>30.1-jre</version>
</dependency>
<dependency>
<groupId>javax.xml.bind</groupId>

View File

@ -48,19 +48,6 @@ import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.apache.http.Consts;
import org.apache.http.NameValuePair;
import org.apache.http.StatusLine;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.client.LaxRedirectStrategy;
import org.apache.http.message.BasicNameValuePair;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonInclude.Include;
import com.fasterxml.jackson.annotation.JsonProperty;
@ -68,7 +55,6 @@ import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.refine.RefineServlet;
import com.google.refine.commands.Command;
import com.google.refine.expr.ExpressionUtils;
import com.google.refine.model.Column;
@ -76,6 +62,7 @@ import com.google.refine.model.Project;
import com.google.refine.model.ReconType;
import com.google.refine.model.Row;
import com.google.refine.model.recon.StandardReconConfig.ReconResult;
import com.google.refine.util.HttpClient;
import com.google.refine.util.ParsingUtilities;
public class GuessTypesOfColumnCommand extends Command {
@ -180,61 +167,38 @@ public class GuessTypesOfColumnCommand extends Command {
}
String queriesString = ParsingUtilities.defaultWriter.writeValueAsString(queryMap);
String responseString;
try {
RequestConfig defaultRequestConfig = RequestConfig.custom()
.setConnectTimeout(30 * 1000)
.build();
responseString = postQueries(serviceUrl, queriesString);
ObjectNode o = ParsingUtilities.evaluateJsonStringToObjectNode(responseString);
HttpClientBuilder httpClientBuilder = HttpClients.custom()
.setUserAgent(RefineServlet.getUserAgent())
.setRedirectStrategy(new LaxRedirectStrategy())
.setDefaultRequestConfig(defaultRequestConfig);
CloseableHttpClient httpClient = httpClientBuilder.build();
HttpPost request = new HttpPost(serviceUrl);
List<NameValuePair> body = Collections.singletonList(
new BasicNameValuePair("queries", queriesString));
request.setEntity(new UrlEncodedFormEntity(body, Consts.UTF_8));
try (CloseableHttpResponse response = httpClient.execute(request)) {
StatusLine statusLine = response.getStatusLine();
if (statusLine.getStatusCode() >= 400) {
throw new IOException("Failed - code:"
+ Integer.toString(statusLine.getStatusCode())
+ " message: " + statusLine.getReasonPhrase());
Iterator<JsonNode> iterator = o.iterator();
while (iterator.hasNext()) {
JsonNode o2 = iterator.next();
if (!(o2.has("result") && o2.get("result") instanceof ArrayNode)) {
continue;
}
String s = ParsingUtilities.inputStreamToString(response.getEntity().getContent());
ObjectNode o = ParsingUtilities.evaluateJsonStringToObjectNode(s);
ArrayNode results = (ArrayNode) o2.get("result");
List<ReconResult> reconResults = ParsingUtilities.mapper.convertValue(results, new TypeReference<List<ReconResult>>() {});
int count = reconResults.size();
Iterator<JsonNode> iterator = o.iterator();
while (iterator.hasNext()) {
JsonNode o2 = iterator.next();
if (!(o2.has("result") && o2.get("result") instanceof ArrayNode)) {
continue;
}
for (int j = 0; j < count; j++) {
ReconResult result = reconResults.get(j);
double score = 1.0 / (1 + j); // score by each result's rank
ArrayNode results = (ArrayNode) o2.get("result");
List<ReconResult> reconResults = ParsingUtilities.mapper.convertValue(results, new TypeReference<List<ReconResult>>() {});
int count = reconResults.size();
List<ReconType> types = result.types;
int typeCount = types.size();
for (int j = 0; j < count; j++) {
ReconResult result = reconResults.get(j);
double score = 1.0 / (1 + j); // score by each result's rank
List<ReconType> types = result.types;
int typeCount = types.size();
for (int t = 0; t < typeCount; t++) {
ReconType type = types.get(t);
double score2 = score * (typeCount - t) / typeCount;
if (map.containsKey(type.id)) {
TypeGroup tg = map.get(type.id);
tg.score += score2;
tg.count++;
} else {
map.put(type.id, new TypeGroup(type.id, type.name, score2));
}
for (int t = 0; t < typeCount; t++) {
ReconType type = types.get(t);
double score2 = score * (typeCount - t) / typeCount;
if (map.containsKey(type.id)) {
TypeGroup tg = map.get(type.id);
tg.score += score2;
tg.count++;
} else {
map.put(type.id, new TypeGroup(type.id, type.name, score2));
}
}
}
@ -259,6 +223,11 @@ public class GuessTypesOfColumnCommand extends Command {
return types;
}
private String postQueries(String serviceUrl, String queriesString) throws IOException {
HttpClient client = new HttpClient();
return client.postNameValue(serviceUrl, "queries", queriesString);
}
static protected class TypeGroup {
@JsonProperty("id")
protected String id;

View File

@ -69,19 +69,13 @@ import org.apache.commons.fileupload.ProgressListener;
import org.apache.commons.fileupload.disk.DiskFileItemFactory;
import org.apache.commons.fileupload.servlet.ServletFileUpload;
import org.apache.commons.fileupload.util.Streams;
import org.apache.http.HttpEntity;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.CredentialsProvider;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.entity.ContentType;
import org.apache.http.impl.client.BasicCredentialsProvider;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.apache.http.StatusLine;
import org.apache.hc.client5.http.ClientProtocolException;
import org.apache.hc.core5.http.ClassicHttpResponse;
import org.apache.hc.core5.http.ContentType;
import org.apache.hc.core5.http.HttpEntity;
import org.apache.hc.core5.http.HttpStatus;
import org.apache.hc.core5.http.io.HttpClientResponseHandler;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -89,10 +83,10 @@ import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.refine.ProjectManager;
import com.google.refine.ProjectMetadata;
import com.google.refine.RefineServlet;
import com.google.refine.importing.ImportingManager.Format;
import com.google.refine.importing.UrlRewriter.Result;
import com.google.refine.model.Project;
import com.google.refine.util.HttpClient;
import com.google.refine.util.JSONUtilities;
import com.google.refine.util.ParsingUtilities;
import java.util.stream.Collectors;
@ -287,65 +281,56 @@ public class ImportingUtilities {
}
if ("http".equals(url.getProtocol()) || "https".equals(url.getProtocol())) {
HttpClientBuilder clientbuilder = HttpClients.custom()
.setUserAgent(RefineServlet.getUserAgent());
// .setConnectionBackoffStrategy(ConnectionBackoffStrategy)
final URL lastUrl = url;
final HttpClientResponseHandler<String> responseHandler = new HttpClientResponseHandler<String>() {
String userinfo = url.getUserInfo();
// HTTPS only - no sending password in the clear over HTTP
if ("https".equals(url.getProtocol()) && userinfo != null) {
int s = userinfo.indexOf(':');
if (s > 0) {
String user = userinfo.substring(0, s);
String pw = userinfo.substring(s + 1, userinfo.length());
CredentialsProvider credsProvider = new BasicCredentialsProvider();
credsProvider.setCredentials(new AuthScope(url.getHost(), 443),
new UsernamePasswordCredentials(user, pw));
clientbuilder = clientbuilder.setDefaultCredentialsProvider(credsProvider);
}
}
@Override
public String handleResponse(final ClassicHttpResponse response) throws IOException {
final int status = response.getCode();
if (status >= HttpStatus.SC_SUCCESS && status < HttpStatus.SC_REDIRECTION) {
final HttpEntity entity = response.getEntity();
if (entity == null) {
throw new IOException("No content found in " + lastUrl.toExternalForm());
}
CloseableHttpClient httpclient = clientbuilder.build();
HttpGet httpGet = new HttpGet(url.toURI());
CloseableHttpResponse response = httpclient.execute(httpGet);
try {
InputStream stream2 = entity.getContent();
try {
HttpEntity entity = response.getEntity();
if (entity == null) {
throw new Exception("No content found in " + url.toString());
}
StatusLine status = response.getStatusLine();
int statusCode = response.getStatusLine().getStatusCode();
if (statusCode >= 400) {
String errorString = ParsingUtilities.inputStreamToString(entity.getContent());
String message = String.format("HTTP error %d : %s | %s", statusCode,
status.getReasonPhrase(), errorString);
throw new Exception(message);
}
InputStream stream2 = entity.getContent();
String mimeType = null;
String charset = null;
ContentType contentType = ContentType.parse(entity.getContentType());
if (contentType != null) {
mimeType = contentType.getMimeType();
Charset cs = contentType.getCharset();
if (cs != null) {
charset = cs.toString();
}
}
JSONUtilities.safePut(fileRecord, "declaredMimeType", mimeType);
JSONUtilities.safePut(fileRecord, "declaredEncoding", charset);
if (saveStream(stream2, lastUrl, rawDataDir, progress, update,
fileRecord, fileRecords,
entity.getContentLength())) {
return "saved"; // signal to increment archive count
}
String mimeType = null;
String charset = null;
ContentType contentType = ContentType.get(entity);
if (contentType != null) {
mimeType = contentType.getMimeType();
Charset cs = contentType.getCharset();
if (cs != null) {
charset = cs.toString();
} catch (final IOException ex) {
throw new ClientProtocolException(ex);
}
return null;
} else {
// String errorBody = EntityUtils.toString(response.getEntity());
throw new ClientProtocolException(String.format("HTTP error %d : %s for URL %s", status,
response.getReasonPhrase(), lastUrl.toExternalForm()));
}
}
JSONUtilities.safePut(fileRecord, "declaredMimeType", mimeType);
JSONUtilities.safePut(fileRecord, "declaredEncoding", charset);
if (saveStream(stream2, url, rawDataDir, progress, update,
fileRecord, fileRecords,
entity.getContentLength())) {
archiveCount++;
}
downloadCount++;
EntityUtils.consume(entity);
} finally {
httpGet.reset();
}
};
HttpClient httpClient = new HttpClient();
if (httpClient.getResponse(urlString, null, responseHandler) != null) {
archiveCount++;
};
downloadCount++;
} else {
// Fallback handling for non HTTP connections (only FTP?)
URLConnection urlConnection = url.openConnection();
@ -418,7 +403,7 @@ public class ImportingUtilities {
private static boolean saveStream(InputStream stream, URL url, File rawDataDir, final Progress progress,
final SavingUpdate update, ObjectNode fileRecord, ArrayNode fileRecords, long length)
throws IOException, Exception {
throws IOException {
String localname = url.getPath();
if (localname.isEmpty() || localname.endsWith("/")) {
localname = localname + "temp";
@ -436,7 +421,7 @@ public class ImportingUtilities {
long actualLength = saveStreamToFile(stream, file, update);
JSONUtilities.safePut(fileRecord, "size", actualLength);
if (actualLength == 0) {
throw new Exception("No content found in " + url.toString());
throw new IOException("No content found in " + url.toString());
} else if (length >= 0) {
update.totalExpectedSize += (actualLength - length);
} else {

View File

@ -37,30 +37,15 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package com.google.refine.model.recon;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.http.Consts;
import org.apache.http.NameValuePair;
import org.apache.http.StatusLine;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.client.LaxRedirectStrategy;
import org.apache.http.message.BasicNameValuePair;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonInclude.Include;
@ -69,14 +54,15 @@ import com.fasterxml.jackson.annotation.JsonView;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.refine.RefineServlet;
import com.google.refine.expr.functions.ToDate;
import com.google.refine.model.ReconCandidate;
import com.google.refine.model.ReconType;
import com.google.refine.util.HttpClient;
import com.google.refine.util.JSONUtilities;
import com.google.refine.util.JsonViews;
import com.google.refine.util.ParsingUtilities;
public class ReconciledDataExtensionJob {
@ -172,13 +158,21 @@ public class ReconciledDataExtensionJob {
final public List<ColumnInfo> columns = new ArrayList<ColumnInfo>();
// not final: initialized lazily
private static CloseableHttpClient httpClient = null;
private static HttpClient httpClient = null;
public ReconciledDataExtensionJob(DataExtensionConfig obj, String endpoint) {
this.extension = obj;
this.endpoint = endpoint;
}
/**
* @todo Although the HTTP code has been unified, there may still be opportunity
* to refactor a higher level querying library out of this which could be shared
* with StandardReconConfig
*
* It may also be possible to extract a library to query reconciliation services
* which could be used outside of OpenRefine.
*/
public Map<String, ReconciledDataExtensionJob.DataExtension> extend(
Set<String> ids,
Map<String, ReconCandidate> reconCandidateMap
@ -187,7 +181,7 @@ public class ReconciledDataExtensionJob {
formulateQuery(ids, extension, writer);
String query = writer.toString();
String response = performQuery(this.endpoint, query);
String response = postExtendQuery(this.endpoint, query);
ObjectNode o = ParsingUtilities.mapper.readValue(response, ObjectNode.class);
@ -218,43 +212,14 @@ public class ReconciledDataExtensionJob {
return map;
}
/**
* @todo this should be refactored to be unified with the HTTP querying code
* from StandardReconConfig. We should ideally extract a library to query
* reconciliation services and expose it as such for others to reuse.
*/
static protected String performQuery(String endpoint, String query) throws IOException {
HttpPost request = new HttpPost(endpoint);
List<NameValuePair> body = Collections.singletonList(
new BasicNameValuePair("extend", query));
request.setEntity(new UrlEncodedFormEntity(body, Consts.UTF_8));
try (CloseableHttpResponse response = getHttpClient().execute(request)) {
StatusLine statusLine = response.getStatusLine();
if (statusLine.getStatusCode() >= 400) {
throw new IOException("Data extension query failed - code: "
+ Integer.toString(statusLine.getStatusCode())
+ " message: " + statusLine.getReasonPhrase());
} else {
return ParsingUtilities.inputStreamToString(response.getEntity().getContent());
}
}
static protected String postExtendQuery(String endpoint, String query) throws IOException {
return getHttpClient().postNameValue(endpoint, "extend", query);
}
private static CloseableHttpClient getHttpClient() {
if (httpClient != null) {
return httpClient;
private static HttpClient getHttpClient() {
if (httpClient == null) {
httpClient = new HttpClient();
}
RequestConfig defaultRequestConfig = RequestConfig.custom()
.setConnectTimeout(30 * 1000)
.build();
HttpClientBuilder httpClientBuilder = HttpClients.custom()
.setUserAgent(RefineServlet.getUserAgent())
.setRedirectStrategy(new LaxRedirectStrategy())
.setDefaultRequestConfig(defaultRequestConfig);
httpClient = httpClientBuilder.build();
return httpClient;
}

View File

@ -45,18 +45,6 @@ import java.util.Map;
import java.util.Set;
import org.apache.commons.lang.StringUtils;
import org.apache.http.Consts;
import org.apache.http.NameValuePair;
import org.apache.http.StatusLine;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.client.LaxRedirectStrategy;
import org.apache.http.message.BasicNameValuePair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -69,7 +57,6 @@ import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.refine.RefineServlet;
import com.google.refine.expr.ExpressionUtils;
import com.google.refine.model.Cell;
import com.google.refine.model.Project;
@ -79,6 +66,7 @@ import com.google.refine.model.ReconCandidate;
import com.google.refine.model.ReconType;
import com.google.refine.model.RecordModel.RowDependency;
import com.google.refine.model.Row;
import com.google.refine.util.HttpClient;
import com.google.refine.util.ParsingUtilities;
public class StandardReconConfig extends ReconConfig {
@ -164,7 +152,7 @@ public class StandardReconConfig extends ReconConfig {
final private int limit;
// initialized lazily
private CloseableHttpClient httpClient = null;
private HttpClient httpClient = null;
@JsonCreator
public StandardReconConfig(
@ -434,29 +422,29 @@ public class StandardReconConfig extends ReconConfig {
try {
job.code = ParsingUtilities.defaultWriter.writeValueAsString(query);
} catch (JsonProcessingException e) {
// FIXME: This error will get lost
e.printStackTrace();
return null; // TODO: Throw exception instead?
}
return job;
}
private CloseableHttpClient getHttpClient() {
if (httpClient != null) {
return httpClient;
private HttpClient getHttpClient() {
if (httpClient == null) {
httpClient = new HttpClient();
}
RequestConfig defaultRequestConfig = RequestConfig.custom()
.setConnectTimeout(30 * 1000)
.setSocketTimeout(60 * 1000)
.build();
HttpClientBuilder httpClientBuilder = HttpClients.custom()
.setUserAgent(RefineServlet.getUserAgent())
.setRedirectStrategy(new LaxRedirectStrategy())
.setDefaultRequestConfig(defaultRequestConfig);
httpClient = httpClientBuilder.build();
return httpClient;
}
private String postQueries(String url, String queriesString) throws IOException {
try {
return getHttpClient().postNameValue(url, "queries", queriesString);
} catch (IOException e) {
throw new IOException("Failed to batch recon with load:\n" + queriesString, e);
}
}
@Override
public List<Recon> batchRecon(List<ReconJob> jobs, long historyEntryID) {
List<Recon> recons = new ArrayList<Recon>(jobs.size());
@ -475,51 +463,41 @@ public class StandardReconConfig extends ReconConfig {
stringWriter.write("}");
String queriesString = stringWriter.toString();
HttpPost request = new HttpPost(service);
List<NameValuePair> body = Collections.singletonList(
new BasicNameValuePair("queries", queriesString));
request.setEntity(new UrlEncodedFormEntity(body, Consts.UTF_8));
try {
String responseString = postQueries(service, queriesString);
ObjectNode o = ParsingUtilities.evaluateJsonStringToObjectNode(responseString);
try (CloseableHttpResponse response = getHttpClient().execute(request)) {
StatusLine statusLine = response.getStatusLine();
if (statusLine.getStatusCode() >= 400) {
logger.error("Failed - code: "
+ Integer.toString(statusLine.getStatusCode())
+ " message: " + statusLine.getReasonPhrase());
if (o == null) { // utility method returns null instead of throwing
logger.error("Failed to parse string as JSON: " + responseString);
} else {
String s = ParsingUtilities.inputStreamToString(response.getEntity().getContent());
ObjectNode o = ParsingUtilities.evaluateJsonStringToObjectNode(s);
if (o == null) { // utility method returns null instead of throwing
logger.error("Failed to parse string as JSON: " + s);
} else {
for (int i = 0; i < jobs.size(); i++) {
StandardReconJob job = (StandardReconJob) jobs.get(i);
Recon recon = null;
for (int i = 0; i < jobs.size(); i++) {
StandardReconJob job = (StandardReconJob) jobs.get(i);
Recon recon = null;
String text = job.text;
String key = "q" + i;
if (o.has(key) && o.get(key) instanceof ObjectNode) {
ObjectNode o2 = (ObjectNode) o.get(key);
if (o2.has("result") && o2.get("result") instanceof ArrayNode) {
ArrayNode results = (ArrayNode) o2.get("result");
String text = job.text;
String key = "q" + i;
if (o.has(key) && o.get(key) instanceof ObjectNode) {
ObjectNode o2 = (ObjectNode) o.get(key);
if (o2.has("result") && o2.get("result") instanceof ArrayNode) {
ArrayNode results = (ArrayNode) o2.get("result");
recon = createReconServiceResults(text, results, historyEntryID);
} else {
logger.warn("Service error for text: " + text + "\n Job code: " + job.code + "\n Response: " + o2.toString());
}
recon = createReconServiceResults(text, results, historyEntryID);
} else {
// TODO: better error reporting
logger.warn("Service error for text: " + text + "\n Job code: " + job.code);
logger.warn("Service error for text: " + text + "\n Job code: " + job.code + "\n Response: " + o2.toString());
}
if (recon != null) {
recon.service = service;
}
recons.add(recon);
} else {
// TODO: better error reporting
logger.warn("Service error for text: " + text + "\n Job code: " + job.code);
}
if (recon != null) {
recon.service = service;
}
recons.add(recon);
}
}
} catch (Exception e) {
} catch (IOException e) {
logger.error("Failed to batch recon with load:\n" + queriesString, e);
}
@ -543,7 +521,7 @@ public class StandardReconConfig extends ReconConfig {
return recon;
}
protected Recon createReconServiceResults(String text, ArrayNode resultsList, long historyEntryID) throws IOException {
protected Recon createReconServiceResults(String text, ArrayNode resultsList, long historyEntryID) {
Recon recon = new Recon(historyEntryID, identifierSpace, schemaSpace);
List<ReconResult> results = ParsingUtilities.mapper.convertValue(resultsList, new TypeReference<List<ReconResult>>() {});

View File

@ -37,27 +37,13 @@ import static com.google.common.base.Strings.isNullOrEmpty;
import java.io.IOException;
import java.io.Serializable;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import java.util.concurrent.TimeUnit;
import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.StatusLine;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.entity.ContentType;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicHeader;
import org.apache.http.util.EntityUtils;
import org.apache.hc.core5.http.Header;
import org.apache.hc.core5.http.message.BasicHeader;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
@ -65,7 +51,6 @@ import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import com.google.refine.RefineServlet;
import com.google.refine.browsing.Engine;
import com.google.refine.browsing.EngineConfig;
import com.google.refine.browsing.FilteredRows;
@ -86,7 +71,7 @@ import com.google.refine.operations.EngineDependentOperation;
import com.google.refine.operations.OnError;
import com.google.refine.process.LongRunningProcess;
import com.google.refine.process.Process;
import com.google.refine.util.ParsingUtilities;
import com.google.refine.util.HttpClient;
public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperation {
@ -117,8 +102,8 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
final protected boolean _cacheResponses;
final protected List<HttpHeader> _httpHeadersJson;
private Header[] httpHeaders = new Header[0];
final private RequestConfig defaultRequestConfig;
private HttpClientBuilder httpClientBuilder;
private HttpClient _httpClient;
@JsonCreator
public ColumnAdditionByFetchingURLsOperation(
@ -163,22 +148,8 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
}
}
httpHeaders = headers.toArray(httpHeaders);
_httpClient = new HttpClient(_delay);
defaultRequestConfig = RequestConfig.custom()
.setConnectTimeout(30 * 1000)
.setConnectionRequestTimeout(30 * 1000)
.setSocketTimeout(10 * 1000).build();
// TODO: Placeholder for future Basic Auth implementation
// CredentialsProvider credsProvider = new BasicCredentialsProvider();
// credsProvider.setCredentials(new AuthScope(host, 443),
// new UsernamePasswordCredentials(user, password));
httpClientBuilder = HttpClients.custom()
.setUserAgent(RefineServlet.getUserAgent())
.setDefaultRequestConfig(defaultRequestConfig);
// .setConnectionBackoffStrategy(ConnectionBackoffStrategy)
// .setDefaultCredentialsProvider(credsProvider);
}
@JsonProperty("newColumnName")
@ -281,20 +252,7 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
.build(
new CacheLoader<String, Serializable>() {
public Serializable load(String urlString) throws Exception {
Serializable result = fetch(urlString);
try {
// Always sleep for the delay, no matter how long the
// request took. This is more responsible than substracting
// the time spend requesting the URL, because it naturally
// slows us down if the server is busy and takes a long time
// to reply.
if (_delay > 0) {
Thread.sleep(_delay);
}
} catch (InterruptedException e) {
result = null;
}
Serializable result = fetch(urlString, httpHeaders);
if (result == null) {
// the load method should not return any null value
throw new Exception("null result returned by fetch");
@ -335,9 +293,9 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
Serializable response = null;
if (_urlCache != null) {
response = cachedFetch(urlString); // TODO: Why does this need a separate method?
response = cachedFetch(urlString);
} else {
response = fetch(urlString);
response = fetch(urlString, httpHeaders);
}
if (response != null) {
@ -380,68 +338,19 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
}
}
Serializable fetch(String urlString) {
HttpGet httpGet;
try {
// Use of URL constructor below is purely to get additional error checking to mimic
// previous behavior for the tests.
httpGet = new HttpGet(new URL(urlString).toURI());
} catch (IllegalArgumentException | MalformedURLException | URISyntaxException e) {
return null;
}
try {
httpGet.setHeaders(httpHeaders);
httpGet.setConfig(defaultRequestConfig);
CloseableHttpClient httpclient = httpClientBuilder.build();
CloseableHttpResponse response = null;
Serializable fetch(String urlString, Header[] headers) {
try { //HttpClients.createDefault()) {
try {
response = httpclient.execute(httpGet);
HttpEntity entity = response.getEntity();
if (entity == null) {
throw new Exception("No content found in " + httpGet.getURI().toString());
}
String encoding = null;
if (entity.getContentEncoding() != null) {
encoding = entity.getContentEncoding().getValue();
} else {
Charset charset = ContentType.getOrDefault(entity).getCharset();
if (charset != null) {
encoding = charset.name();
}
}
String result = ParsingUtilities.inputStreamToString(
entity.getContent(), (encoding == null) || ( encoding.equalsIgnoreCase("\"UTF-8\"")) ? "UTF-8" : encoding);
EntityUtils.consume(entity);
return result;
return _httpClient.getAsString(urlString, headers);
} catch (IOException e) {
String message;
if (response == null) {
message = "Unknown HTTP error " + e.getLocalizedMessage();
} else {
StatusLine status = response.getStatusLine();
HttpEntity errorEntity = response.getEntity();
String errorString = ParsingUtilities.inputStreamToString(errorEntity.getContent());
message = String.format("HTTP error %d : %s | %s", status.getStatusCode(),
status.getReasonPhrase(),
errorString);
}
return _onError == OnError.StoreError ? new EvalError(message) : null;
return _onError == OnError.StoreError ? new EvalError(e) : null;
}
} catch (Exception e) {
return _onError == OnError.StoreError ? new EvalError(e.getMessage()) : null;
}
}
RowVisitor createRowVisitor(List<CellAtRow> cellsAtRows) {
return new RowVisitor() {
int cellIndex;
@ -497,4 +406,5 @@ public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperat
}.init(cellsAtRows);
}
}
}

View File

@ -0,0 +1,208 @@
package com.google.refine.util;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.TimeUnit;
import org.apache.hc.client5.http.ClientProtocolException;
import org.apache.hc.client5.http.classic.methods.HttpGet;
import org.apache.hc.client5.http.classic.methods.HttpPost;
import org.apache.hc.client5.http.config.RequestConfig;
import org.apache.hc.client5.http.entity.UrlEncodedFormEntity;
import org.apache.hc.client5.http.impl.DefaultHttpRequestRetryStrategy;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
import org.apache.hc.client5.http.impl.classic.HttpClientBuilder;
import org.apache.hc.client5.http.impl.classic.HttpClients;
import org.apache.hc.client5.http.impl.io.PoolingHttpClientConnectionManager;
import org.apache.hc.core5.http.ClassicHttpResponse;
import org.apache.hc.core5.http.EntityDetails;
import org.apache.hc.core5.http.Header;
import org.apache.hc.core5.http.HttpEntity;
import org.apache.hc.core5.http.HttpException;
import org.apache.hc.core5.http.HttpRequest;
import org.apache.hc.core5.http.HttpRequestInterceptor;
import org.apache.hc.core5.http.HttpResponse;
import org.apache.hc.core5.http.HttpStatus;
import org.apache.hc.core5.http.NameValuePair;
import org.apache.hc.core5.http.ParseException;
import org.apache.hc.core5.http.io.HttpClientResponseHandler;
import org.apache.hc.core5.http.io.SocketConfig;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.apache.hc.core5.http.message.BasicNameValuePair;
import org.apache.hc.core5.http.protocol.HttpContext;
import org.apache.hc.core5.util.TimeValue;
import com.google.refine.RefineServlet;
public class HttpClient {
final private RequestConfig defaultRequestConfig;
private HttpClientBuilder httpClientBuilder;
private CloseableHttpClient httpClient;
private int _delay;
public HttpClient() {
this(0);
}
public HttpClient(int delay) {
_delay = delay;
// Create a connection manager with a custom socket timeout
PoolingHttpClientConnectionManager connManager = new PoolingHttpClientConnectionManager();
final SocketConfig socketConfig = SocketConfig.custom()
.setSoTimeout(10, TimeUnit.SECONDS)
.build();
connManager.setDefaultSocketConfig(socketConfig);
defaultRequestConfig = RequestConfig.custom()
.setConnectTimeout(30, TimeUnit.SECONDS)
.setConnectionRequestTimeout(30, TimeUnit.SECONDS) // TODO: 60 seconds in some places in old code
.build();
httpClientBuilder = HttpClients.custom()
.setUserAgent(RefineServlet.getUserAgent())
.setDefaultRequestConfig(defaultRequestConfig)
.setConnectionManager(connManager)
// Default Apache HC retry is 1x @1 sec (or the value in Retry-Header)
.setRetryStrategy(new ExponentialBackoffRetryStrategy(3, TimeValue.ofMilliseconds(_delay)))
// .setRedirectStrategy(new LaxRedirectStrategy()) // TODO: No longer needed since default doesn't exclude POST?
// .setConnectionBackoffStrategy(ConnectionBackoffStrategy)
.addRequestInterceptorFirst(new HttpRequestInterceptor() {
private long nextRequestTime = System.currentTimeMillis();
@Override
public void process(
final HttpRequest request,
final EntityDetails entity,
final HttpContext context) throws HttpException, IOException {
long delay = nextRequestTime - System.currentTimeMillis();
if (delay > 0) {
try {
Thread.sleep(delay);
} catch (InterruptedException e) {
}
}
nextRequestTime = System.currentTimeMillis() + _delay;
}
});
// TODO: Placeholder for future Basic Auth implementation
// String userinfo = url.getUserInfo();
// // HTTPS only - no sending password in the clear over HTTP
// if ("https".equals(url.getProtocol()) && userinfo != null) {
// int s = userinfo.indexOf(':');
// if (s > 0) {
// String user = userinfo.substring(0, s);
// String pw = userinfo.substring(s + 1, userinfo.length());
// CredentialsProvider credsProvider = new BasicCredentialsProvider();
// credsProvider.setCredentials(new AuthScope(url.getHost(), 443),
// new UsernamePasswordCredentials(user, pw.toCharArray()));
// httpClientBuilder = httpClientBuilder.setDefaultCredentialsProvider(credsProvider);
// }
// }
httpClient = httpClientBuilder.build();
}
public String getAsString(String urlString, Header[] headers) throws IOException {
final HttpClientResponseHandler<String> responseHandler = new HttpClientResponseHandler<String>() {
@Override
public String handleResponse(final ClassicHttpResponse response) throws IOException {
final int status = response.getCode();
if (status >= HttpStatus.SC_SUCCESS && status < HttpStatus.SC_REDIRECTION) {
final HttpEntity entity = response.getEntity();
if (entity == null) {
throw new IOException("No content found in " + urlString);
}
try {
return EntityUtils.toString(entity);
} catch (final ParseException ex) {
throw new ClientProtocolException(ex);
}
} else {
// String errorBody = EntityUtils.toString(response.getEntity());
throw new ClientProtocolException(String.format("HTTP error %d : %s for URL %s", status,
response.getReasonPhrase(), urlString));
}
}
};
return getResponse(urlString, headers, responseHandler);
}
public String getResponse(String urlString, Header[] headers, HttpClientResponseHandler<String> responseHandler) throws IOException {
try {
// Use of URL constructor below is purely to get additional error checking to mimic
// previous behavior for the tests.
new URL(urlString).toURI();
} catch (IllegalArgumentException | MalformedURLException | URISyntaxException e) {
return null;
}
HttpGet httpGet = new HttpGet(urlString);
if (headers != null && headers.length > 0) {
httpGet.setHeaders(headers);
}
httpGet.setConfig(defaultRequestConfig); // FIXME: Redundant? already includes in client builder
return httpClient.execute(httpGet, responseHandler);
}
public String postNameValue(String serviceUrl, String name, String value) throws IOException {
HttpPost request = new HttpPost(serviceUrl);
List<NameValuePair> body = Collections.singletonList(
new BasicNameValuePair(name, value));
request.setEntity(new UrlEncodedFormEntity(body, StandardCharsets.UTF_8));
try (CloseableHttpResponse response = httpClient.execute(request)) {
String reasonPhrase = response.getReasonPhrase();
int statusCode = response.getCode();
if (statusCode >= 400) { // We should never see 3xx since they get handled automatically
throw new IOException(String.format("HTTP error %d : %s for URL %s", statusCode, reasonPhrase,
request.getRequestUri()));
}
return ParsingUtilities.inputStreamToString(response.getEntity().getContent());
}
}
/**
* Use binary exponential backoff strategy, instead of the default fixed
* retry interval, if the server doesn't provide a Retry-After time.
*/
class ExponentialBackoffRetryStrategy extends DefaultHttpRequestRetryStrategy {
private final TimeValue defaultInterval;
public ExponentialBackoffRetryStrategy(final int maxRetries, final TimeValue defaultRetryInterval) {
super(maxRetries, defaultRetryInterval);
this.defaultInterval = defaultRetryInterval;
}
@Override
public TimeValue getRetryInterval(HttpResponse response, int execCount, HttpContext context) {
// Get the default implementation's interval
TimeValue interval = super.getRetryInterval(response, execCount, context);
// If it's the same as the default, there was no Retry-After, so use binary
// exponential backoff
if (interval.compareTo(defaultInterval) == 0) {
interval = TimeValue.of(((Double) (Math.pow(2, execCount) * defaultInterval.getDuration())).longValue(),
defaultInterval.getTimeUnit() );
return interval;
}
return interval;
}
}
}

View File

@ -1,13 +1,6 @@
# OpenRefine test suite
# OpenRefine UI test suite
## Install
Please refer to the official OpenRefine documentation
```
cd ./main/tests/e2e
npm install
```
## Usage
- Run OpenRefine on a separate terminal
- Open the Cypress test runner with `./node_modules/.bin/cypress open`
- [How to build tests and run](https://docs.openrefine.org/technical-reference/build-test-run/)
- [Functional tests](https://docs.openrefine.org/technical-reference/functional-tests)

View File

@ -2,7 +2,7 @@
"integrationFolder": "./cypress/integration",
"nodeVersion": "system",
"retries": {
"runMode": 1,
"runMode": 2,
"openMode": 1
},
"env":{

View File

@ -88,7 +88,7 @@ describe(__filename, function () {
cy.get('.dialog-container').should('exist').should('be.visible');
cy.get('.dialog-container button[bind="closeButton"]').click();
cy.get('.dialog-container').should('not.be.visible');
cy.get('.dialog-container').should('not.exist');
});
it('Ensure action are recorded in the extract panel', function () {

View File

@ -3,8 +3,8 @@ describe(__filename, function () {
cy.loadAndVisitProject('food.mini.csv');
cy.deleteColumn('NDB_No');
cy.get('#notification-container').should('be.visible').contains('Remove column NDB_No');
cy.get('#notification-container .notification-action').should('be.visible').contains('Undo');
cy.get('#notification-container').should('be.visible').should('to.contain', 'Remove column NDB_No');
cy.get('#notification-container .notification-action').should('be.visible').should('to.contain', 'Undo');
});
it('Ensure the Undo button is effectively working', function () {
@ -12,7 +12,8 @@ describe(__filename, function () {
cy.deleteColumn('NDB_No');
// ensure that the column is back in the grid
cy.get('#notification-container .notification-action').should('be.visible').contains('Undo').click();
cy.get('#notification-container .notification-action').should('be.visible').should('to.contain', 'Undo');
cy.get('#notification-container a[bind="undoLink"]').click();
cy.get('.data-table th[title="NDB_No"]').should('exist');
});
@ -21,39 +22,39 @@ describe(__filename, function () {
// delete NDB_No
cy.deleteColumn('NDB_No');
cy.get('#or-proj-undoRedo').contains('1 / 1');
cy.get('.history-panel-body .history-now').contains('Remove column NDB_No');
cy.get('#or-proj-undoRedo').should('to.contain', '1 / 1');
cy.get('.history-panel-body .history-now').should('to.contain', 'Remove column NDB_No');
// delete Water
cy.deleteColumn('Water');
cy.get('#or-proj-undoRedo').contains('2 / 2');
cy.get('.history-panel-body .history-now').contains('Remove column Water');
cy.get('#or-proj-undoRedo').should('to.contain', '2 / 2');
cy.get('.history-panel-body .history-now').should('to.contain', 'Remove column Water');
// Delete Shrt_Desc
cy.deleteColumn('Shrt_Desc');
cy.get('#or-proj-undoRedo').contains('3 / 3');
cy.get('.history-panel-body .history-now').contains('Remove column Shrt_Desc');
cy.get('#or-proj-undoRedo').should('to.contain', '3 / 3');
cy.get('.history-panel-body .history-now').should('to.contain', 'Remove column Shrt_Desc');
// Open the Undo/Redo panel
cy.get('#or-proj-undoRedo').click();
// ensure all previous actions have been recorded
cy.get('.history-panel-body .history-past a.history-entry:nth-of-type(2)').contains('Remove column NDB_No');
cy.get('.history-panel-body .history-past a.history-entry:nth-of-type(3)').contains('Remove column Water');
cy.get('.history-panel-body .history-now').contains('Remove column Shrt_Desc');
cy.get('.history-panel-body .history-past').should('to.contain', 'Remove column NDB_No');
cy.get('.history-panel-body .history-past').should('to.contain', 'Remove column Water');
cy.get('.history-panel-body .history-now').should('to.contain', 'Remove column Shrt_Desc');
// successively undo all modifications
cy.get('.history-panel-body .history-past a.history-entry:last-of-type').click();
cy.waitForOrOperation();
cy.get('.history-panel-body .history-past').contains('Remove column NDB_No');
cy.get('.history-panel-body .history-now').contains('Remove column Water');
cy.get('.history-panel-body .history-future').contains('Remove column Shrt_Desc');
cy.get('.history-panel-body .history-past').should('to.contain', 'Remove column NDB_No');
cy.get('.history-panel-body .history-now').should('to.contain', 'Remove column Water');
cy.get('.history-panel-body .history-future').should('to.contain', 'Remove column Shrt_Desc');
cy.get('.history-panel-body .history-past a.history-entry:last-of-type').click();
cy.waitForOrOperation();
cy.get('.history-panel-body .history-now').contains('Remove column NDB_No');
cy.get('.history-panel-body .history-future').contains('Remove column Water');
cy.get('.history-panel-body .history-future').contains('Remove column Shrt_Desc');
cy.get('.history-panel-body .history-now').should('to.contain', 'Remove column NDB_No');
cy.get('.history-panel-body .history-future').should('to.contain', 'Remove column Water');
cy.get('.history-panel-body .history-future').should('to.contain', 'Remove column Shrt_Desc');
});
// Very long test to run

View File

@ -67,7 +67,7 @@ describe(__filename, function () {
// cypress does not support window.location = ...
cy.get('h2').contains('HTTP ERROR 404');
cy.location().should((location) => {
expect(location.href).contains('http://localhost:3333/__/project?');
expect(location.href).contains(Cypress.env('OPENREFINE_URL')+'/__/project?');
});
cy.location().then((location) => {

View File

@ -41,7 +41,7 @@ Cypress.Commands.add('doCreateProjectThroughUserInterface', () => {
// cypress does not support window.location = ...
cy.get('h2').contains('HTTP ERROR 404');
cy.location().should((location) => {
expect(location.href).contains('http://localhost:3333/__/project?');
expect(location.href).contains(Cypress.env('OPENREFINE_URL')+'/__/project?');
});
cy.location().then((location) => {
@ -67,7 +67,9 @@ Cypress.Commands.add('assertCellEquals', (rowIndex, columnName, value) => {
cy.get(`table.data-table thead th[title="${columnName}"]`).then(($elem) => {
// there are 3 td at the beginning of each row
const columnIndex = $elem.index() + 3;
cy.get(`table.data-table tbody tr:nth-child(${cssRowIndex}) td:nth-child(${columnIndex}) div`).contains(value, { timeout: 5000 });
cy.get(`table.data-table tbody tr:nth-child(${cssRowIndex}) td:nth-child(${columnIndex}) div.data-table-cell-content > span`).should(($cellSpan)=>{
expect($cellSpan.text()).equals(value);
});
});
});
@ -92,7 +94,7 @@ Cypress.Commands.add('waitForDialogPanel', () => {
Cypress.Commands.add('confirmDialogPanel', () => {
cy.get('body > .dialog-container > .dialog-frame .dialog-footer button[bind="okButton"]').click();
cy.get('body > .dialog-container > .dialog-frame').should('not.be.visible');
cy.get('body > .dialog-container > .dialog-frame').should('not.exist');
});
Cypress.Commands.add('columnActionClick', (columnName, actions) => {

View File

@ -37,7 +37,7 @@ afterEach(() => {
});
before(() => {
cy.request('http://127.0.0.1:3333/command/core/get-csrf-token').then((response) => {
cy.request(Cypress.env('OPENREFINE_URL')+'/command/core/get-csrf-token').then((response) => {
// store one unique token for block of runs
token = response.body.token;
});

View File

@ -1,8 +1,9 @@
Cypress.Commands.add('setPreference', (preferenceName, preferenceValue) => {
cy.request(Cypress.env('OPENREFINE_URL') + '/command/core/get-csrf-token').then((response) => {
const openRefineUrl = Cypress.env('OPENREFINE_URL')
cy.request( openRefineUrl + '/command/core/get-csrf-token').then((response) => {
cy.request({
method: 'POST',
url: `http://127.0.0.1:3333/command/core/set-preference`,
url: `${openRefineUrl}/command/core/set-preference`,
body: `name=${preferenceName}&value="${preferenceValue}"&csrf_token=${response.body.token}`,
form: false,
headers: {
@ -15,12 +16,13 @@ Cypress.Commands.add('setPreference', (preferenceName, preferenceValue) => {
});
Cypress.Commands.add('cleanupProjects', () => {
const openRefineUrl = Cypress.env('OPENREFINE_URL')
cy.get('@deletetoken', { log: false }).then((token) => {
cy.get('@loadedProjectIds', { log: false }).then((loadedProjectIds) => {
for (const projectId of loadedProjectIds) {
cy.request({
method: 'POST',
url: `http://127.0.0.1:3333/command/core/delete-project?csrf_token=` + token,
url: `${openRefineUrl}/command/core/delete-project?csrf_token=` + token,
body: { project: projectId },
form: true,
}).then((resp) => {
@ -32,6 +34,7 @@ Cypress.Commands.add('cleanupProjects', () => {
});
Cypress.Commands.add('loadProject', (fixture, projectName) => {
const openRefineUrl = Cypress.env('OPENREFINE_URL');
const openRefineProjectName = projectName ? projectName : fixture;
cy.fixture(fixture).then((content) => {
cy.get('@token', { log: false }).then((token) => {
@ -54,7 +57,7 @@ Cypress.Commands.add('loadProject', (fixture, projectName) => {
cy.request({
method: 'POST',
url: `http://127.0.0.1:3333/command/core/create-project-from-upload?csrf_token=` + token,
url: `${openRefineUrl}/command/core/create-project-from-upload?csrf_token=` + token,
body: postData,
headers: {
'content-type': 'multipart/form-data; boundary=----BOUNDARY',

View File

@ -1,16 +1,16 @@
{
"name":"OpenRefine-Cypress-Test-Suite",
"version":"1.0.0",
"description":"Cypress tests for OpenRefine",
"license":"BSD-3-Clause",
"author":"OpenRefine",
"private":true,
"dependencies":{
"cypress":"5.6.0",
"cypress-file-upload":"^4.1.1",
"cypress-wait-until":"^1.7.1",
"dotenv":"^8.2.0",
"fs-extra":"^9.0.1",
"uniqid":"^5.2.0"
}
"name": "openrefine-cypress-test-suite",
"version": "1.0.0",
"description": "Cypress tests for OpenRefine",
"license": "BSD-3-Clause",
"author": "OpenRefine",
"private": true,
"dependencies": {
"cypress": "6.0.1",
"cypress-file-upload": "^4.1.1",
"cypress-wait-until": "^1.7.1",
"dotenv": "^8.2.0",
"fs-extra": "^9.0.1",
"uniqid": "^5.2.0"
}
}

View File

@ -377,10 +377,10 @@ cypress-wait-until@^1.7.1:
resolved "https://registry.yarnpkg.com/cypress-wait-until/-/cypress-wait-until-1.7.1.tgz#3789cd18affdbb848e3cfc1f918353c7ba1de6f8"
integrity sha512-8DL5IsBTbAxBjfYgCzdbohPq/bY+IKc63fxtso1C8RWhLnQkZbVESyaclNr76jyxfId6uyzX8+Xnt0ZwaXNtkA==
cypress@5.6.0:
version "5.6.0"
resolved "https://registry.yarnpkg.com/cypress/-/cypress-5.6.0.tgz#6781755c3ddfd644ce3179fcd7389176c0c82280"
integrity sha512-cs5vG3E2JLldAc16+5yQxaVRLLqMVya5RlrfPWkC72S5xrlHFdw7ovxPb61s4wYweROKTyH01WQc2PFzwwVvyQ==
cypress@6.0.0:
version "6.0.0"
resolved "https://registry.yarnpkg.com/cypress/-/cypress-6.0.0.tgz#57050773c61e8fe1e5c9871cc034c616fcacded9"
integrity sha512-A/w9S15xGxX5UVeAQZacKBqaA0Uqlae9e5WMrehehAdFiLOZj08IgSVZOV8YqA9OH9Z0iBOnmsEkK3NNj43VrA==
dependencies:
"@cypress/listr-verbose-renderer" "^0.4.1"
"@cypress/request" "^2.88.5"

View File

@ -29,6 +29,7 @@ package com.google.refine.importing;
import static org.mockito.Mockito.when;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;
import static org.testng.Assert.fail;
import java.io.File;
import java.io.IOException;
@ -98,8 +99,6 @@ public class ImportingUtilitiesTests extends ImporterTest {
public void urlImporting() throws IOException {
String RESPONSE_BODY = "{code:401,message:Unauthorised}";
String MESSAGE = String.format("HTTP error %d : %s | %s", 401,
"Client Error", RESPONSE_BODY);
MockWebServer server = new MockWebServer();
MockResponse mockResponse = new MockResponse();
@ -108,6 +107,8 @@ public class ImportingUtilitiesTests extends ImporterTest {
server.start();
server.enqueue(mockResponse);
HttpUrl url = server.url("/random");
String MESSAGE = String.format("HTTP error %d : %s for URL %s", 401,
"Client Error", url);
MultipartEntityBuilder builder = MultipartEntityBuilder.create();
StringBody stringBody = new StringBody(url.toString(), ContentType.MULTIPART_FORM_DATA);
@ -145,9 +146,9 @@ public class ImportingUtilitiesTests extends ImporterTest {
return job.canceled;
}
});
Assert.fail("No Exception was thrown");
fail("No Exception was thrown");
} catch (Exception exception) {
Assert.assertEquals(MESSAGE, exception.getMessage());
assertEquals(exception.getMessage(), MESSAGE);
} finally {
server.close();
}

View File

@ -91,7 +91,7 @@ public class StandardReconConfigTests extends RefineTest {
return wordDistance(s1, s2);
}
protected Recon createReconServiceResults(String text, ArrayNode resultsList, long historyEntryID) throws IOException {
protected Recon createReconServiceResults(String text, ArrayNode resultsList, long historyEntryID) {
return super.createReconServiceResults(text, resultsList, historyEntryID);
}
}

View File

@ -33,6 +33,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package com.google.refine.operations.column;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
@ -129,7 +132,7 @@ public class ColumnAdditionByFetchingURLsOperationTests extends RefineTest {
} catch (InterruptedException e) {
Assert.fail("Test interrupted");
}
Assert.assertFalse(process.isRunning());
Assert.assertFalse(process.isRunning(),"Process failed to complete within timeout " + timeout);
}
@Test
@ -273,4 +276,102 @@ public class ColumnAdditionByFetchingURLsOperationTests extends RefineTest {
}
}
@Test
public void testRetries() throws Exception {
try (MockWebServer server = new MockWebServer()) {
server.start();
HttpUrl url = server.url("/retries");
for (int i = 0; i < 2; i++) {
Row row = new Row(2);
row.setCell(0, new Cell("test" + (i + 1), null));
project.rows.add(row);
}
// Queue 5 error responses with 1 sec. Retry-After interval
for (int i = 0; i < 5; i++) {
server.enqueue(new MockResponse()
.setHeader("Retry-After", 1)
.setResponseCode(429)
.setBody(Integer.toString(i,10)));
}
server.enqueue(new MockResponse().setBody("success"));
EngineDependentOperation op = new ColumnAdditionByFetchingURLsOperation(engine_config,
"fruits",
"\"" + url + "?city=\"+value",
OnError.StoreError,
"rand",
1,
100,
false,
null);
// 6 requests (4 retries @1 sec) + final response
long start = System.currentTimeMillis();
runAndWait(op, 4500);
// Make sure that our Retry-After headers were obeyed (4*1 sec vs 4*100msec)
long elapsed = System.currentTimeMillis() - start;
assertTrue(elapsed > 4000, "Retry-After retries didn't take long enough - elapsed = " + elapsed );
// 1st row fails after 4 tries (3 retries), 2nd row tries twice and gets value
assertTrue(project.rows.get(0).getCellValue(1).toString().contains("HTTP error 429"), "missing 429 error");
assertEquals(project.rows.get(1).getCellValue(1).toString(), "success");
server.shutdown();
}
}
@Test
public void testExponentialRetries() throws Exception {
try (MockWebServer server = new MockWebServer()) {
server.start();
HttpUrl url = server.url("/retries");
for (int i = 0; i < 3; i++) {
Row row = new Row(2);
row.setCell(0, new Cell("test" + (i + 1), null));
project.rows.add(row);
}
// Use 503 Server Unavailable with no Retry-After header this time
for (int i = 0; i < 5; i++) {
server.enqueue(new MockResponse()
.setResponseCode(503)
.setBody(Integer.toString(i,10)));
}
server.enqueue(new MockResponse().setBody("success"));
server.enqueue(new MockResponse().setBody("not found").setResponseCode(404));
ColumnAdditionByFetchingURLsOperation op = new ColumnAdditionByFetchingURLsOperation(engine_config,
"fruits",
"\"" + url + "?city=\"+value",
OnError.StoreError,
"rand",
1,
100,
false,
null);
// 6 requests (4 retries 200, 400, 800, 200 msec) + final response
long start = System.currentTimeMillis();
runAndWait(op, 2500);
// Make sure that our exponential back off is working
long elapsed = System.currentTimeMillis() - start;
assertTrue(elapsed > 1600, "Exponential retries didn't take enough time - elapsed = " + elapsed);
// 1st row fails after 4 tries (3 retries), 2nd row tries twice and gets value, 3rd row is hard error
assertTrue(project.rows.get(0).getCellValue(1).toString().contains("HTTP error 503"), "Missing 503 error");
assertEquals(project.rows.get(1).getCellValue(1).toString(), "success");
assertTrue(project.rows.get(2).getCellValue(1).toString().contains("HTTP error 404"),"Missing 404 error");
server.shutdown();
}
}
}

View File

@ -38,9 +38,7 @@ import static org.mockito.Mockito.mock;
import static org.powermock.api.mockito.PowerMockito.mockStatic;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringWriter;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
@ -49,7 +47,6 @@ import java.util.Map;
import java.util.Properties;
import java.util.Set;
import org.apache.commons.io.IOUtils;
import org.mockito.invocation.InvocationOnMock;
import org.mockito.stubbing.Answer;
import org.powermock.api.mockito.PowerMockito;
@ -225,7 +222,6 @@ public class ExtendDataOperationTests extends RefineTest {
* Test to fetch simple strings
* @throws Exception
*/
@BeforeMethod
public void mockHttpCalls() throws Exception {
mockStatic(ReconciledDataExtensionJob.class);
@ -236,7 +232,7 @@ public class ExtendDataOperationTests extends RefineTest {
return fakeHttpCall(invocation.getArgument(0), invocation.getArgument(1));
}
};
PowerMockito.doAnswer(mockedResponse).when(ReconciledDataExtensionJob.class, "performQuery", anyString(), anyString());
PowerMockito.doAnswer(mockedResponse).when(ReconciledDataExtensionJob.class, "postExtendQuery", anyString(), anyString());
}
@AfterMethod

View File

@ -244,6 +244,7 @@
"core-dialogs/sql-exporter": "SQL Exporter",
"core-dialogs/custom-tab-exp": "Custom Tabular Exporter",
"core-dialogs/select-columns-dialog": "Select columns",
"core-dialogs/unstar-expression": "Unstar expression?",
"core-dialogs/content": "Content",
"core-dialogs/download": "Download",
"core-dialogs/upload": "Upload",

View File

@ -163,7 +163,7 @@
"core-dialogs/idling": "בהמתנה…",
"core-dialogs/updating": "מתבצע עדכון…",
"core-dialogs/scatterplot-matrix": "טבלת פיזור",
"core-dialogs/focusing-on": "התמקדות ב",
"core-dialogs/focusing-on": "התמקדות על",
"core-dialogs/processing": "מתבצע עיבוד…",
"core-dialogs/error-getColumnInfo": "שגיאה בביצוע 'get-columns-info'",
"core-dialogs/no-column-dataset": "אין טורם בצביר נתונים זה",
@ -171,12 +171,12 @@
"core-dialogs/logarithmic-plot": "הצגה לוגריטמית",
"core-dialogs/rotated-counter-clock": "הוסב ב-45 מעלות בניגוד ללכיוון השעון",
"core-dialogs/no-rotation": "ללא סיבוב",
"core-dialogs/rotated-clock": "הוסב ב-45 מעלות עם כיוון השעון",
"core-dialogs/rotated-clock": "הוסב ב־45 מעלות עם כיוון השעון",
"core-dialogs/small-dot": "גודל הנקודה הקטנה",
"core-dialogs/regular-dot": "גודל נקודה רגילה",
"core-dialogs/big-dot": "גודל נקודה גדולה",
"core-dialogs/cell-fields": "בתא הנוכחי, יש כמה שדות: 'value', 'recon' and 'errorMessage'.",
"core-dialogs/cell-value": "ערך התא הנוכחי. זהו קיצור דרך עבור 'cell.value'.",
"core-dialogs/cell-fields": "בתא הנוכחי, יש מספר שדות: value, recon ו־errorMessage.",
"core-dialogs/cell-value": "ערך התא הנוכחי. זהו קיצור דרך עבור cell.value.",
"core-dialogs/row-fields": "בשורה הנוכחית, יש 5 שדות: 'flagged', 'starred', 'index', 'cells', 'record'.",
"core-dialogs/cells-of-row": "התאים של השורה הנוכחית. זהו קיצור דרך ל־row.cells. אפשר להחזיר תא מסוים בעזרת cells.<column name> אם <column name> היא מילה בודדת, או עם cells[\"<column name>\"] אם יותר.",
"core-dialogs/row-index": "אינדקס השורה הנוכחית. זהו קיצור דרך ל- 'row.index'.",
@ -697,5 +697,6 @@
"core-views/blank-records": "רשומות ריקות לפי עמודה",
"core-views/blank-values": "ערכים ריקים לפי עמודה",
"core-views/blank-rows": "שורות ריקות",
"core-views/goto-page": "$1 מתוך {{plural:$2|דף אחד|$2 דפים}}"
"core-views/goto-page": "$1 מתוך {{plural:$2|דף אחד|$2 דפים}}",
"core-dialogs/focusing-on-column": " (התמקדות על $1)"
}

View File

@ -192,7 +192,7 @@
"core-dialogs/use-this-val": "この値を使う",
"core-dialogs/cells-of-row": "現在の行のcells: row.cellsの別名です。特定のセルは、cells.<column name>かcells[column name]でアクセスできます.",
"core-dialogs/for-include-drop-statement-checkbox": "Drop文を含める",
"core-dialogs/ngram-radius": "ngram半径;",
"core-dialogs/ngram-radius": "半径&nbsp;",
"core-dialogs/processing": "処理中…",
"core-dialogs/row-index": "現在の行のindexrow.indexの別名",
"core-dialogs/ignore-facets": "ファセットやフィルタを無視し、すべての行を出力",

View File

@ -350,15 +350,30 @@ ExpressionPreviewDialog.Widget.prototype._renderStarredExpressions = function(da
var o = Scripting.parse(entry.code);
$('<a href="javascript:{}">'+$.i18n('core-dialogs/remove')+'</a>').appendTo(tr.insertCell(0)).click(function() {
Refine.postCSRF(
"command/core/toggle-starred-expression",
{ expression: entry.code, returnList: true },
function(data) {
self._renderStarredExpressions(data);
self._renderExpressionHistoryTab();
},
"json"
);
var removeExpression = DialogSystem.createDialog();
removeExpression.width("250px");
var removeExpressionHead = $('<div></div>').addClass("dialog-header").text($.i18n('core-dialogs/unstar-expression'))
.appendTo(removeExpression);
var removeExpressionFooter = $('<div></div>').addClass("dialog-footer").appendTo(removeExpression);
$('<button class="button"></button>').html($.i18n('core-buttons/ok')).click(function() {
Refine.postCSRF(
"command/core/toggle-starred-expression",
{ expression: entry.code, returnList: true },
function(data) {
self._renderStarredExpressions(data);
self._renderExpressionHistoryTab();
},
"json"
);
DialogSystem.dismissUntil(DialogSystem._layers.length - 1);
}).appendTo(removeExpressionFooter);
$('<button class="button" style="float:right;"></button>').text($.i18n('core-buttons/cancel')).click(function() {
DialogSystem.dismissUntil(DialogSystem._layers.length - 1);
}).appendTo(removeExpressionFooter);
this._level = DialogSystem.showDialog(removeExpression);
});
$('<a href="javascript:{}">Reuse</a>').appendTo(tr.insertCell(1)).click(function() {

56
refine
View File

@ -64,7 +64,7 @@ and <action> is one of
test ................................ Run all OpenRefine tests
server_test ......................... Run only the server tests
ui_test ............................. Run only the UI tests
ui_test <browser> <id> <key> ........ Run only the UI tests (If passing a project Id and a Record Key, tests will be recorded in Cypress.io Dashboard)
extensions_test ..................... Run only the extensions tests
broker .............................. Run OpenRefine Broker
@ -480,15 +480,31 @@ test() {
}
ui_test() {
INTERACTIVE=$1
get_revision
windmill_prepare
BROWSER="$1"
CYPRESS_PROJECT_ID="$2"
CYPRESS_RECORD_KEY="$3"
CYPRESS_RECORD=0
if [ -z "$BROWSER" ] ; then
BROWSER="electron"
fi
if [ ! -z "$CYPRESS_PROJECT_ID" ] && [ ! -z "$CYPRESS_RECORD_KEY" ] ; then
CYPRESS_RECORD=1
echo "Tests will be recorded in Cypress Dashboard"
elif [ ! -z "$CYPRESS_PROJECT_ID" ] && [ -z "$CYPRESS_RECORD_KEY" ] ; then
fail "Found a Cypress project id but no record key"
fi
REFINE_DATA_DIR="${TMPDIR:=/tmp}/openrefine-tests"
add_option "-Drefine.headless=true"
add_option "-Drefine.autoreload=false"
add_option "-Dbutterfly.autoreload=false"
run fork
run fork > /dev/null
echo "Waiting for OpenRefine to load..."
sleep 5
@ -499,15 +515,25 @@ ui_test() {
echo "... proceed with the tests."
echo ""
load_data "$REFINE_TEST_DIR/data/food.csv" "Food"
sleep 3
echo ""
echo "Starting Cypress..."
CYPRESS_RUN_CMD="yarn --cwd ./main/tests/cypress run cypress run --browser $BROWSER --headless --quiet --reporter list --env OPENREFINE_URL=http://$REFINE_HOST:$REFINE_PORT"
if [ "$CYPRESS_RECORD" = "1" ] ; then
# if tests are recorded, project id is added to env vars, and --record flag is added to the cmd-line
export CYPRESS_PROJECT_ID=$CYPRESS_PROJECT_ID
CYPRESS_RUN_CMD="$CYPRESS_RUN_CMD --record --key $CYPRESS_RECORD_KEY --tag $BROWSER,$REVISION"
fi
export MOZ_FORCE_DISABLE_E10S=1
echo $CYPRESS_RUN_CMD
$CYPRESS_RUN_CMD
echo "Starting Windmill..."
if [ -z "$INTERACTIVE" ] ; then
"$WINDMILL" firefox firebug loglevel=WARN http://${REFINE_HOST}:${REFINE_PORT}/ jsdir=$REFINE_TEST_DIR/client/src exit
if [ "$?" = "0" ] ; then
UI_TEST_SUCCESS="1"
else
"$WINDMILL" firefox firebug loglevel=WARN http://${REFINE_HOST}:${REFINE_PORT}/
UI_TEST_SUCCESS="0"
fi
if [ "$CYPRESS_RECORD" = "1" ] ; then
echo "You can review tests on Cypress.io: https://dashboard.cypress.io/projects/$CYPRESS_PROJECT_ID/runs"
fi
echo ""
@ -515,6 +541,10 @@ ui_test() {
/bin/kill -9 $REFINE_PID
echo "Cleaning up"
rm -rf "$REFINE_DATA_DIR"
if [ "$UI_TEST_SUCCESS" = "0" ] ; then
error "The UI test suite failed."
fi
}
server_test() {
@ -926,8 +956,8 @@ case "$ACTION" in
distclean) mvn distclean;;
test) test $1;;
tests) test $1;;
ui_test) ui_test $1;;
ui_tests) ui_test $1;;
ui_test) ui_test $1 $2 $3;;
ui_tests) ui_test $1 $2 $3;;
server_test) server_test $1;;
server_tests) server_test $1;;
extensions_test) extensions_test $1;;