diff --git a/refine b/refine index a981f945f..6ff9298da 100755 --- a/refine +++ b/refine @@ -34,8 +34,11 @@ where [options] include: -p the port that OpenRefine will listen to default: 3333 - -i the host interface OpenRefine should bind to + -i the network interface OpenRefine should bind to default: 127.0.0.1 + + -H the expected value for the Host header (set to * to disable checks) + default: -w path to the webapp default: main/webapp @@ -151,7 +154,7 @@ check_pywin32() { check_running() { check_downloaders - URL="http://${REFINE_HOST}:${REFINE_PORT}/" + URL="http://${REFINE_HOST_INTERNAL}:${REFINE_PORT}/" CHECK_STR="OpenRefine" if [ "$CURL" ] ; then @@ -215,7 +218,7 @@ get_revision() { elif [ -d ".git" ] ; then INFO=`git describe` REVISION=`echo $INFO` - REVISION=${REVISION:4} + REVISION=${REVISION:4} else error "cannot obtain revision, exiting!" fi @@ -258,7 +261,7 @@ tool_download() { load_data() { FILE=$1 NAME=$2 - URL="http://${REFINE_HOST}:${REFINE_PORT}/command/core/create-project-from-upload" + URL="http://${REFINE_HOST_INTERNAL}:${REFINE_PORT}/command/core/create-project-from-upload" CURL="`which curl 2> /dev/null`" if [ -z "$CURL" ] ; then @@ -272,11 +275,11 @@ load_data() { display() { FILE=$1 if [ "$OS" = "macosx" ] ; then - open $FILE + open $FILE elif [ "$OS" = "linux" ] ; then - gnome-open $FILE + gnome-open $FILE else - notepad $FILE + notepad $FILE fi } @@ -516,7 +519,7 @@ ui_test() { echo "" echo "Starting Cypress..." - CYPRESS_RUN_CMD="yarn --cwd ./main/tests/cypress run cypress run --browser $BROWSER --headless --quiet --reporter list --env OPENREFINE_URL=http://$REFINE_HOST:$REFINE_PORT" + CYPRESS_RUN_CMD="yarn --cwd ./main/tests/cypress run cypress run --browser $BROWSER --headless --quiet --reporter list --env OPENREFINE_URL=http://$REFINE_HOST_INTERNAL:$REFINE_PORT" if [ "$CYPRESS_RECORD" = "1" ] ; then # if tests are recorded, project id is added to env vars, and --record flag is added to the cmd-line export CYPRESS_PROJECT_ID=$CYPRESS_PROJECT_ID @@ -596,6 +599,10 @@ run() { add_option "-Drefine.port=$REFINE_PORT" fi + if [ "$REFINE_INTERFACE" ] ; then + add_option "-Drefine.interface=$REFINE_INTERFACE" + fi + if [ "$REFINE_HOST" ] ; then add_option "-Drefine.host=$REFINE_HOST" fi @@ -609,7 +616,7 @@ run() { RUN_CMD=("$JAVA" -cp "$CLASSPATH" "${OPTS[@]}" "com.google.refine.Refine") echo "${RUN_CMD[@]}" - echo "Starting OpenRefine at 'http://${REFINE_HOST}:${REFINE_PORT}/'" + echo "Starting OpenRefine at 'http://${REFINE_HOST_INTERNAL}:${REFINE_PORT}/'" echo "" if [ -z "$FORK" ] ; then @@ -654,7 +661,7 @@ broker_run() { #echo "$RUN_CMD" #echo "" - echo "Starting OpenRefine Broker at 'http://${REFINE_HOST}:${REFINE_PORT}/'" + echo "Starting OpenRefine Broker at 'http://0.0.0.0:${REFINE_PORT}/'" echo "" if [ -z "$FORK" ] ; then @@ -802,8 +809,8 @@ fi if [ "$OS" = "macosx" ] ; then if [ -z "$JAVA_HOME" ] ; then - # We need want recent Java because we're bundling JRE - may want to warn and force developer to set JAVA_HOME - # The /usr/libexec/java_home utility may be tied to the Java prefs app, so could go away when Apple removes it + # We need want recent Java because we're bundling JRE - may want to warn and force developer to set JAVA_HOME + # The /usr/libexec/java_home utility may be tied to the Java prefs app, so could go away when Apple removes it export JAVA_HOME=$(/usr/libexec/java_home) fi fi @@ -824,9 +831,10 @@ checkJavaMajorVersion while [ $# -ne 0 ] ; do case "$1" in - -h) usage;; + -h) usage;; -p) shift; REFINE_PORT="$1"; shift; continue;; - -i) shift; REFINE_HOST="$1"; shift; continue;; + -H) shift; REFINE_HOST="$1"; shift; continue;; + -i) shift; REFINE_INTERFACE="$1"; shift; continue;; -w) shift; REFINE_WEBAPP="$1"; shift; continue;; -d) shift; REFINE_DATA_DIR="$1"; shift; continue;; -m) @@ -873,9 +881,9 @@ add_option "-Xms$REFINE_MIN_MEMORY" "-Xmx$REFINE_MEMORY" "-Drefine.memory=$REFIN freeRam=UNKNOWN if [ "$OS" = "macosx" ] ; then - freeRam=$(top -l 1 | grep PhysMem | awk '{print $6}' | tr -d M) + freeRam=$(top -l 1 | grep PhysMem | awk '{print $6}' | tr -d M) elif [ "$OS" = "linux" ] ; then - freeRam=$(free -m | grep -oP '\d+' | head -n 1) + freeRam=$(free -m | grep -oP '\d+' | head -n 1) fi echo You have "$freeRam"M of free memory. @@ -892,8 +900,23 @@ if [ -z "$REFINE_PORT" ] ; then REFINE_PORT="3333" fi +if [ -z "$REFINE_INTERFACE" ] ; then + REFINE_INTERFACE="127.0.0.1" +fi + if [ -z "$REFINE_HOST" ] ; then - REFINE_HOST="127.0.0.1" + if [ "$REFINE_INTERFACE" = "0.0.0.0" ] ; then + REFINE_HOST='*' + else + REFINE_HOST="$REFINE_INTERFACE" + fi +fi + +if [ "$REFINE_HOST" = '*' ] ; then + echo No host specified while binding to interface 0.0.0.0, guessing localhost. + REFINE_HOST_INTERNAL="localhost" +else + REFINE_HOST_INTERNAL="$REFINE_HOST" fi if [ -z "$REFINE_WEBAPP" ] ; then diff --git a/refine.bat b/refine.bat index 6d594d9c3..b720b4f60 100644 --- a/refine.bat +++ b/refine.bat @@ -1,5 +1,4 @@ -rem Changing this for debugging on Appveyor -rem @echo off +@echo off rem rem Configuration variables rem @@ -29,9 +28,12 @@ echo. echo "/p " the port that OpenRefine will listen to echo default: 3333 echo. -echo "/i " the host interface OpenRefine should bind to +echo "/i " the network interface OpenRefine should bind to echo default: 127.0.0.1 echo. +echo "/H " the expected value for the Host header (set to * to disable checks) +echo default: ^ +echo. echo "/w " path to the webapp echo default src\main\webapp echo. @@ -99,10 +101,11 @@ rem --- Argument parsing -------------------------------------------- :loop if ""%1"" == """" goto endArgumentParsing -if ""%1"" == ""/h"" goto usage if ""%1"" == ""/?"" goto usage +if ""%1"" == ""/h"" goto usage if ""%1"" == ""/p"" goto arg-p if ""%1"" == ""/i"" goto arg-i +if ""%1"" == ""/H"" goto arg-H if ""%1"" == ""/w"" goto arg-w if ""%1"" == ""/d"" goto arg-d if ""%1"" == ""/m"" goto arg-m @@ -114,6 +117,10 @@ set REFINE_PORT=%2 goto shift2loop :arg-i +set REFINE_INTERFACE=%2 +goto shift2loop + +:arg-H set REFINE_HOST=%2 goto shift2loop @@ -165,10 +172,17 @@ set REFINE_PORT=3333 :gotPort set OPTS=%OPTS% -Drefine.port=%REFINE_PORT% +if not "%REFINE_INTERFACE%" == "" goto gotInterface +set REFINE_INTERFACE=127.0.0.1 +:gotInterface +set OPTS=%OPTS% -Drefine.interface=%REFINE_INTERFACE% + if not "%REFINE_HOST%" == "" goto gotHost -set REFINE_HOST=127.0.0.1 +if "%REFINE_INTERFACE%" == "" goto skipHost +set REFINE_HOST=%REFINE_INTERFACE% :gotHost set OPTS=%OPTS% -Drefine.host=%REFINE_HOST% +:skipHost if not "%REFINE_WEBAPP%" == "" goto gotWebApp set REFINE_WEBAPP=main\webapp diff --git a/server/src/com/google/refine/Refine.java b/server/src/com/google/refine/Refine.java index 4436934e6..0a524055d 100644 --- a/server/src/com/google/refine/Refine.java +++ b/server/src/com/google/refine/Refine.java @@ -75,11 +75,12 @@ import com.google.refine.Configurations; */ public class Refine { - static private final String DEFAULT_HOST = "127.0.0.1"; + static private final String DEFAULT_IFACE = "127.0.0.1"; static private final int DEFAULT_PORT = 3333; static private int port; static private String host; + static private String iface; final static Logger logger = LoggerFactory.getLogger("refine"); @@ -100,8 +101,12 @@ public class Refine { // set the log verbosity level org.apache.log4j.Logger.getRootLogger().setLevel(Level.toLevel(Configurations.get("refine.verbosity","info"))); - port = Configurations.getInteger("refine.port",DEFAULT_PORT); - host = Configurations.get("refine.host",DEFAULT_HOST); + port = Configurations.getInteger("refine.port", DEFAULT_PORT); + iface = Configurations.get("refine.interface", DEFAULT_IFACE); + host = Configurations.get("refine.host", iface); + if ("0.0.0.0".equals(host)) { + host = "*"; + } Refine refine = new Refine(); @@ -111,7 +116,7 @@ public class Refine { public void init(String[] args) throws Exception { RefineServer server = new RefineServer(); - server.init(host,port); + server.init(iface, port, host); boolean headless = Configurations.getBoolean("refine.headless",false); if (headless) { @@ -120,7 +125,16 @@ public class Refine { } else { try { RefineClient client = new RefineClient(); - client.init(host,port); + if ("*".equals(host)) { + if ("0.0.0.0".equals(iface)) { + logger.warn("No refine.host specified while binding to interface 0.0.0.0, guessing localhost."); + client.init("localhost",port); + } else { + client.init(iface,port); + } + } else { + client.init(host,port); + } } catch (Exception e) { logger.warn("Sorry, some error prevented us from launching the browser for you.\n\n Point your browser to http://" + host + ":" + port + "/ to start using Refine."); } @@ -155,8 +169,8 @@ class RefineServer extends Server { private ThreadPoolExecutor threadPool; - public void init(String host, int port) throws Exception { - logger.info("Starting Server bound to '" + host + ":" + port + "'"); + public void init(String iface, int port, String host) throws Exception { + logger.info("Starting Server bound to '" + iface + ":" + port + "'"); String memory = Configurations.get("refine.memory"); if (memory != null) { @@ -168,7 +182,7 @@ class RefineServer extends Server { HttpConnectionFactory httpFactory = new HttpConnectionFactory(httpConfig); ServerConnector connector = new ServerConnector(this, httpFactory); connector.setPort(port); - connector.setHost(host); + connector.setHost(iface); connector.setIdleTimeout(Configurations.getInteger("server.connection.max_idle_time",60000)); this.addConnector(connector); @@ -192,7 +206,14 @@ class RefineServer extends Server { WebAppContext context = new WebAppContext(webapp.getAbsolutePath(), contextPath); context.setMaxFormContentSize(maxFormContentSize); - this.setHandler(context); + if ("*".equals(host)) { + this.setHandler(context); + } else { + ValidateHostHandler wrapper = new ValidateHostHandler(host); + wrapper.setHandler(context); + this.setHandler(wrapper); + } + this.setStopAtShutdown(true); StatisticsHandler handler = new StatisticsHandler(); handler.setServer(this); @@ -485,12 +506,7 @@ class RefineClient extends JFrame implements ActionListener { private URI uri; public void init(String host, int port) throws Exception { - - String cleanedHost = host; - if("0.0.0.0".equals(host)) { - cleanedHost = "localhost"; - } - uri = new URI("http://" + cleanedHost + ":" + port + "/"); + uri = new URI("http://" + host + ":" + port + "/"); openBrowser(); } diff --git a/server/src/com/google/refine/ValidateHostHandler.java b/server/src/com/google/refine/ValidateHostHandler.java new file mode 100644 index 000000000..f41da8a17 --- /dev/null +++ b/server/src/com/google/refine/ValidateHostHandler.java @@ -0,0 +1,104 @@ +/******************************************************************************* + * Copyright (C) 2020, OpenRefine contributors + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + ******************************************************************************/ + +package com.google.refine; + +import java.io.IOException; +import java.util.regex.Pattern; +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; +import org.eclipse.jetty.server.handler.HandlerWrapper; +import org.eclipse.jetty.server.Request; + +/** + * Validate the Host header of the HTTP request to see if it matches either a loopback IP + * address, localhost or an explicitly specified hostname. This is required to avoid DNS + * rebinding attacks against users running OpenRefine on their desktop computers. + */ +class ValidateHostHandler extends HandlerWrapper { + + /** + * Matches: + * - addresses in the 127.0.0.0/8 subnet + * - IPv4-mapped addresses in the ::ffff:7f00:00/104 subnet + * - different representations of ::1 + * - localhost + * Matching is a little fuzzy to simplify the regular expression - it expects the Host + * header to be well-formed. Some invalid addresses would be accepted, for example: + * - 127.6..64.245 + * - 0::0:::0:00:1 + * This is not a problem however, as these are not valid DNS names either, and should + * never be sent by a well-behaved browser - and validating the host header only ever + * helps if the browser works as expected and cannot be used to fake the Host header. + */ + static private final Pattern LOOPBACK_PATTERN = Pattern + .compile("^(?:127\\.[0-9\\.]*|\\[[0\\:]*\\:(?:ffff\\:7f[0-9a-f]{2}:[0-9a-f]{1,4}|0{0,3}1)\\]|localhost)(?:\\:[0-9]+)?$", Pattern.CASE_INSENSITIVE); + + private String expectedHost; + + public ValidateHostHandler(String expectedHost) { + this.expectedHost = expectedHost; + } + + public boolean isValidHost(String host) { + + // Allow loopback IPv4 and IPv6 addresses, as well as localhost + if (LOOPBACK_PATTERN.matcher(host).find()) { + return true; + } + + // Strip port from hostname - for IPv6 addresses, if + // they end with a bracket, then there is no port + int index = host.lastIndexOf(':'); + if (index > 0 && !host.endsWith("]")) { + host = host.substring(0, index); + } + + // Strip brackets from IPv6 addresses + if (host.startsWith("[") && host.endsWith("]")) { + host = host.substring(1, host.length() - 2); + } + + // Allow only if stripped hostname matches expected hostname + return expectedHost.equalsIgnoreCase(host); + } + + @Override + public void handle(String target, Request baseRequest, HttpServletRequest request, HttpServletResponse response) + throws IOException, ServletException { + String host = request.getHeader("Host"); + if (isValidHost(host)) { + super.handle(target, baseRequest, request, response); + } else { + // Return HTTP 404 Not Found, since we are + // not serving content for the requested URL + response.sendError(HttpServletResponse.SC_NOT_FOUND, "Invalid hostname"); + } + } + +} \ No newline at end of file