diff --git a/broker/appengine/src/com/google/gridworks/appengine/AppEngineClientConnection.java b/broker/appengine/src/com/google/gridworks/appengine/AppEngineClientConnection.java deleted file mode 100644 index 70012f94e..000000000 --- a/broker/appengine/src/com/google/gridworks/appengine/AppEngineClientConnection.java +++ /dev/null @@ -1,243 +0,0 @@ -package com.google.gridworks.appengine; - -import static com.google.appengine.api.urlfetch.FetchOptions.Builder.allowTruncate; - -import java.io.ByteArrayOutputStream; -import java.net.InetAddress; -import java.net.URL; -import java.util.concurrent.TimeUnit; - -import javax.net.ssl.SSLSession; - -import org.apache.http.Header; -import org.apache.http.HttpConnectionMetrics; -import org.apache.http.HttpHost; -import org.apache.http.HttpResponse; -import org.apache.http.ProtocolVersion; -import org.apache.http.conn.ManagedClientConnection; -import org.apache.http.conn.routing.HttpRoute; -import org.apache.http.entity.ByteArrayEntity; -import org.apache.http.message.BasicHttpResponse; -import org.apache.http.params.HttpParams; -import org.apache.http.protocol.HttpContext; - -import com.google.appengine.api.urlfetch.HTTPHeader; -import com.google.appengine.api.urlfetch.HTTPMethod; -import com.google.appengine.api.urlfetch.HTTPRequest; -import com.google.appengine.api.urlfetch.HTTPResponse; -import com.google.appengine.api.urlfetch.URLFetchService; -import com.google.appengine.api.urlfetch.URLFetchServiceFactory; - -class AppEngineClientConnection implements ManagedClientConnection { - // Managed is the composition of ConnectionReleaseTrigger, - // HttpClientConnection, HttpConnection, HttpInetConnection - - private HttpRoute _route; - private Object _state; - private boolean _reuseable; - - public AppEngineClientConnection(HttpRoute route, Object state) { - _route = route; - _state = state; - } - - // ManagedClientConnection methods - - public HttpRoute getRoute() { - return _route; - } - - public Object getState() { - return _state; - } - - public SSLSession getSSLSession() { - return null; - } - - public boolean isSecure() { - // XXX maybe parse the url to see if it's https? - return false; - } - - public boolean isMarkedReusable() { - return _reuseable; - } - - public void markReusable() { - _reuseable = true; - } - - public void layerProtocol(HttpContext context, HttpParams params) { - return; - } - - public void open(HttpRoute route, HttpContext context, HttpParams params) { - return; - } - - public void setIdleDuration(long duration, TimeUnit unit) { - return; - } - - public void setState(Object state) { - _state = state; - } - - public void tunnelProxy(HttpHost next, boolean secure, HttpParams params) { - return; - } - - public void tunnelTarget(boolean secure, HttpParams params) { - return; - } - - public void unmarkReusable() { - _reuseable = false; - } - - - // ConnectionReleaseTrigger methods - - public void releaseConnection() { - return; - } - - public void abortConnection() { - return; - } - - // HttpClientConnection methods - - private HTTPRequest _appengine_hrequest; - private HTTPResponse _appengine_hresponse; - - public void flush() { - return; - } - - public boolean isResponseAvailable(int timeout) { - // XXX possibly use Async fetcher - return true; - } - - public void receiveResponseEntity(org.apache.http.HttpResponse apache_response) { - byte[] data = _appengine_hresponse.getContent(); - - if (data != null) { - apache_response.setEntity(new ByteArrayEntity(data)); - } - } - - public HttpResponse receiveResponseHeader() { - URLFetchService ufs = URLFetchServiceFactory.getURLFetchService(); - try { - _appengine_hresponse = ufs.fetch(_appengine_hrequest); - } catch (java.io.IOException e) { - throw new RuntimeException(e); - } - - org.apache.http.HttpResponse apache_response = - new BasicHttpResponse(new ProtocolVersion("HTTP", 1, 0), - _appengine_hresponse.getResponseCode(), - null); - - for (HTTPHeader h : _appengine_hresponse.getHeaders()) { - apache_response.addHeader(h.getName(), h.getValue()); - } - - return apache_response; - } - - public void sendRequestEntity(org.apache.http.HttpEntityEnclosingRequest request) { - ByteArrayOutputStream os = new ByteArrayOutputStream(); - - org.apache.http.HttpEntity ent = request.getEntity(); - if (ent != null) { - try { - ent.writeTo(os); - } catch (java.io.IOException e) { - throw new RuntimeException(e); - } - } - - _appengine_hrequest.setPayload(os.toByteArray()); - } - - public void sendRequestHeader(org.apache.http.HttpRequest apache_request) { - URL request_url; - - HttpHost host = _route.getTargetHost(); - - String protocol = host.getSchemeName(); - String addr = host.getHostName(); - int port = host.getPort(); - - String path = apache_request.getRequestLine().getUri(); - - try { - request_url = new URL(protocol, addr, port, path); - } catch (java.net.MalformedURLException e) { - throw new RuntimeException(e); - } - - HTTPMethod method = HTTPMethod.valueOf(apache_request.getRequestLine().getMethod()); - _appengine_hrequest = new HTTPRequest(request_url, method, allowTruncate() - .doNotFollowRedirects()); - - Header[] apache_headers = apache_request.getAllHeaders(); - for (int i = 0; i < apache_headers.length; i++) { - Header h = apache_headers[i]; - _appengine_hrequest - .setHeader(new HTTPHeader(h.getName(), h.getValue())); - } - } - - // HttpConnection methods - - public void close() { - return; - } - - public HttpConnectionMetrics getMetrics() { - return null; - } - - public int getSocketTimeout() { - return -1; - } - - public boolean isOpen() { - return true; - } - - public boolean isStale() { - return false; - } - - public void setSocketTimeout(int timeout) { - return; - } - - public void shutdown() { - return; - } - - // HttpInetConnection methods - - public InetAddress getLocalAddress() { - return null; - } - - public int getLocalPort() { - return -1; - } - - public InetAddress getRemoteAddress() { - return null; - } - - public int getRemotePort() { - return -1; - } -} \ No newline at end of file diff --git a/broker/appengine/src/com/google/gridworks/appengine/AppEngineClientConnectionManager.java b/broker/appengine/src/com/google/gridworks/appengine/AppEngineClientConnectionManager.java deleted file mode 100644 index 50da0fd8c..000000000 --- a/broker/appengine/src/com/google/gridworks/appengine/AppEngineClientConnectionManager.java +++ /dev/null @@ -1,76 +0,0 @@ -package com.google.gridworks.appengine; - -import java.net.InetAddress; -import java.net.Socket; -import java.util.concurrent.TimeUnit; - -import org.apache.http.conn.ClientConnectionManager; -import org.apache.http.conn.ClientConnectionRequest; -import org.apache.http.conn.ManagedClientConnection; -import org.apache.http.conn.routing.HttpRoute; -import org.apache.http.conn.scheme.Scheme; -import org.apache.http.conn.scheme.SchemeRegistry; -import org.apache.http.conn.scheme.SocketFactory; -import org.apache.http.params.HttpParams; - -public class AppEngineClientConnectionManager implements ClientConnectionManager { - - private SchemeRegistry schemes; - - class NoopSocketFactory implements SocketFactory { - public Socket connectSocket(Socket sock, String host, int port, InetAddress addr, int lport, HttpParams params) { - return null; - } - - public Socket createSocket() { - return null; - } - - public boolean isSecure(Socket sock) { - return false; - } - } - - public AppEngineClientConnectionManager() { - SocketFactory noop_sf = new NoopSocketFactory(); - schemes = new SchemeRegistry(); - schemes.register(new Scheme("http", noop_sf, 80)); - schemes.register(new Scheme("https", noop_sf, 443)); - } - - public void closeExpiredConnections() { - return; - } - - public void closeIdleConnections(long idletime, TimeUnit tunit) { - return; - } - - public ManagedClientConnection getConnection(HttpRoute route, Object state) { - return new AppEngineClientConnection(route, state); - } - - public SchemeRegistry getSchemeRegistry() { - return schemes; - } - - public void releaseConnection(ManagedClientConnection conn, long valid, TimeUnit tuint) { - return; - } - - public ClientConnectionRequest requestConnection(final HttpRoute route, final Object state) { - return new ClientConnectionRequest() { - public void abortRequest() { - return; - } - - public ManagedClientConnection getConnection(long idletime, TimeUnit tunit) { - return AppEngineClientConnectionManager.this.getConnection(route, state); - } - }; - } - - public void shutdown() { - return; - } -} diff --git a/broker/appengine/src/com/google/gridworks/broker/AppEngineGridworksBrokerImpl.java b/broker/appengine/src/com/google/gridworks/broker/AppEngineGridworksBrokerImpl.java deleted file mode 100644 index 2e4263975..000000000 --- a/broker/appengine/src/com/google/gridworks/broker/AppEngineGridworksBrokerImpl.java +++ /dev/null @@ -1,373 +0,0 @@ -package com.google.gridworks.broker; - -import java.io.Writer; -import java.util.ArrayList; -import java.util.List; - -import javax.jdo.Extent; -import javax.jdo.JDOHelper; -import javax.jdo.PersistenceManager; -import javax.jdo.PersistenceManagerFactory; -import javax.jdo.Transaction; -import javax.jdo.annotations.IdGeneratorStrategy; -import javax.jdo.annotations.PersistenceCapable; -import javax.jdo.annotations.Persistent; -import javax.jdo.annotations.PrimaryKey; -import javax.servlet.ServletConfig; -import javax.servlet.http.HttpServletResponse; - -import org.apache.http.client.HttpClient; -import org.apache.http.conn.ClientConnectionManager; -import org.apache.http.impl.client.DefaultHttpClient; -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.google.appengine.api.datastore.Text; -import com.google.gridworks.appengine.AppEngineClientConnectionManager; -import com.google.gridworks.broker.GridworksBroker; - -public class AppEngineGridworksBrokerImpl extends GridworksBroker { - - protected static final Logger logger = LoggerFactory.getLogger("gridworks.broker.appengine"); - - PersistenceManagerFactory pmfInstance; - - @Override - public void init(ServletConfig config) throws Exception { - super.init(config); - - pmfInstance = JDOHelper.getPersistenceManagerFactory("transactional"); - } - - @Override - public void destroy() throws Exception { - } - - // --------------------------------------------------------------------------------- - - protected HttpClient getHttpClient() { - ClientConnectionManager cm = new AppEngineClientConnectionManager(); - return new DefaultHttpClient(cm, null); - } - - // --------------------------------------------------------------------------------- - - protected void expire(HttpServletResponse response) throws Exception { - // TODO: implement - } - - protected void startProject(HttpServletResponse response, String pid, String uid, String lock, byte[] data, String metadata, List transformations) throws Exception { - // TODO: implement - } - - protected void expireLocks(HttpServletResponse response) throws Exception { - - PersistenceManager pm = pmfInstance.getPersistenceManager(); - - try { - Extent extent = pm.getExtent(Lock.class, false); - - try { - for (Lock lock : extent) { - if (lock.timestamp + LOCK_DURATION < System.currentTimeMillis()) { - Transaction tx = pm.currentTransaction(); - try { - tx.begin(); - pm.deletePersistent(lock); - tx.commit(); - } finally { - if (tx.isActive()) { - tx.rollback(); - } - } - } - } - } finally { - extent.closeAll(); - } - - respond(response, OK); - - } finally { - pm.close(); - } - } - - protected void getState(HttpServletResponse response, String pid, String uid, int rev) throws Exception { - PersistenceManager pm = pmfInstance.getPersistenceManager(); - - try { - // TODO: implement - respond(response, lockToJSON(getLock(pm,pid))); - } finally { - pm.close(); - } - } - - protected void obtainLock(HttpServletResponse response, String pid, String uid, int locktype, String lockvalue) throws Exception { - PersistenceManager pm = pmfInstance.getPersistenceManager(); - - // TODO: implement - - try { - Lock lock = getLock(pm, pid); - if (lock == null) { - Transaction tx = pm.currentTransaction(); - - try { - tx.begin(); - lock = new Lock(Long.toHexString(tx.hashCode()), pid, uid); - pm.makePersistent(lock); - tx.commit(); - } finally { - if (tx.isActive()) { - tx.rollback(); - } - } - } - - respond(response, lockToJSON(lock)); - - } finally { - pm.close(); - } - } - - protected void releaseLock(HttpServletResponse response, String pid, String uid, String lid) throws Exception { - - PersistenceManager pm = pmfInstance.getPersistenceManager(); - - try { - Lock lock = getLock(pm, pid); - if (lock != null) { - if (!lock.id.equals(lid)) { - throw new RuntimeException("Lock id doesn't match, can't release the lock"); - } - if (!lock.uid.equals(uid)) { - throw new RuntimeException("User id doesn't match the lock owner, can't release the lock"); - } - - Transaction tx = pm.currentTransaction(); - - try { - tx.begin(); - pm.deletePersistent(lock); - tx.commit(); - } finally { - if (tx.isActive()) { - tx.rollback(); - } - } - } - - respond(response, OK); - - } finally { - pm.close(); - } - } - - // ---------------------------------------------------------------------------------------------------- - - protected void startProject(HttpServletResponse response, String pid, String uid, String lid, String data) throws Exception { - PersistenceManager pm = pmfInstance.getPersistenceManager(); - - try { - checkLock(pm, pid, uid, lid); - - Project project = getProject(pm, pid); - - if (project != null) { - throw new RuntimeException("Project '" + pid + "' already exists"); - } - - Transaction tx = pm.currentTransaction(); - - try { - tx.begin(); - project = new Project(pid, data); - pm.makePersistent(project); - tx.commit(); - } finally { - if (tx.isActive()) { - tx.rollback(); - } - } - - respond(response, OK); - } finally { - pm.close(); - } - } - - protected void addTransformations(HttpServletResponse response, String pid, String uid, String lid, List transformations) throws Exception { - PersistenceManager pm = pmfInstance.getPersistenceManager(); - - try { - checkLock(pm, pid, uid, lid); - - Project project = getProject(pm, pid); - - if (project == null) { - throw new RuntimeException("Project '" + pid + "' not found"); - } - - Transaction tx = pm.currentTransaction(); - - try { - for (String s : transformations) { - project.transformations.add(new Text(s)); - } - tx.commit(); - } finally { - if (tx.isActive()) { - tx.rollback(); - } - } - - respond(response, OK); - } finally { - pm.close(); - } - } - - // --------------------------------------------------------------------------------- - - protected void openProject(HttpServletResponse response, String pid) throws Exception { - PersistenceManager pm = pmfInstance.getPersistenceManager(); - - try { - Project project = getProject(pm, pid); - - Writer w = response.getWriter(); - JSONWriter writer = new JSONWriter(w); - writer.object(); - writer.key("data"); writer.value(project.data.toString()); - writer.key("transformations"); - writer.array(); - for (Text s : project.transformations) { - writer.value(s.toString()); - } - writer.endArray(); - writer.endObject(); - w.flush(); - w.close(); - } finally { - pm.close(); - } - } - - protected void getHistory(HttpServletResponse response, String pid, int tindex) throws Exception { - PersistenceManager pm = pmfInstance.getPersistenceManager(); - - try { - Project project = getProject(pm, pid); - - Writer w = response.getWriter(); - JSONWriter writer = new JSONWriter(w); - writer.object(); - writer.key("transformations"); - writer.array(); - int size = project.transformations.size(); - for (int i = tindex; i < size; i++) { - writer.value(project.transformations.get(i).toString()); - } - writer.endArray(); - writer.endObject(); - w.flush(); - w.close(); - } finally { - pm.close(); - } - } - - // --------------------------------------------------------------------------------- - - Project getProject(PersistenceManager pm, String pid) { - Project project = pm.getObjectById(Project.class, pid); - if (project == null) { - throw new RuntimeException("Project '" + pid + "' is not managed by this broker"); - } - return project; - } - - @PersistenceCapable - static class Project { - - @PrimaryKey - @Persistent(valueStrategy = IdGeneratorStrategy.IDENTITY) - String pid; - - @Persistent - List transformations = new ArrayList(); - - @Persistent - Text data; - - Project(String pid, String data) { - this.pid = pid; - this.data = new Text(data); - } - } - - // --------------------------------------------------------------------------------- - - Lock getLock(PersistenceManager pm, String pid) { - return pm.getObjectById(Lock.class, pid); - } - - void checkLock(PersistenceManager pm, String pid, String uid, String lid) { - Lock lock = getLock(pm, pid); - - if (lock == null) { - throw new RuntimeException("No lock was found with the given Lock id '" + lid + "', you have to have a valid lock on a project in order to start it"); - } - - if (!lock.pid.equals(pid)) { - throw new RuntimeException("Lock '" + lid + "' is for another project: " + pid); - } - - if (!lock.uid.equals(uid)) { - throw new RuntimeException("Lock '" + lid + "' is owned by another user: " + uid); - } - } - - JSONObject lockToJSON(Lock lock) throws JSONException { - JSONObject o = new JSONObject(); - if (lock != null) { - o.put("lock_id", lock.id); - o.put("project_id", lock.pid); - o.put("user_id", lock.uid); - o.put("timestamp", lock.timestamp); - } - return o; - } - - @PersistenceCapable - static class Lock { - - @Persistent - String id; - - @PrimaryKey - @Persistent(valueStrategy = IdGeneratorStrategy.IDENTITY) - String pid; - - @Persistent - String uid; - - @Persistent - long timestamp; - - Lock(String id, String pid, String uid) { - this.id = id; - this.pid = pid; - this.uid = uid; - this.timestamp = System.currentTimeMillis(); - } - } - -} diff --git a/broker/appengine/src/com/google/refine/appengine/AppEngineClientConnection.java b/broker/appengine/src/com/google/refine/appengine/AppEngineClientConnection.java new file mode 100644 index 000000000..9f15466ed --- /dev/null +++ b/broker/appengine/src/com/google/refine/appengine/AppEngineClientConnection.java @@ -0,0 +1,243 @@ +package com.google.refine.appengine; + +import static com.google.appengine.api.urlfetch.FetchOptions.Builder.allowTruncate; + +import java.io.ByteArrayOutputStream; +import java.net.InetAddress; +import java.net.URL; +import java.util.concurrent.TimeUnit; + +import javax.net.ssl.SSLSession; + +import org.apache.http.Header; +import org.apache.http.HttpConnectionMetrics; +import org.apache.http.HttpHost; +import org.apache.http.HttpResponse; +import org.apache.http.ProtocolVersion; +import org.apache.http.conn.ManagedClientConnection; +import org.apache.http.conn.routing.HttpRoute; +import org.apache.http.entity.ByteArrayEntity; +import org.apache.http.message.BasicHttpResponse; +import org.apache.http.params.HttpParams; +import org.apache.http.protocol.HttpContext; + +import com.google.appengine.api.urlfetch.HTTPHeader; +import com.google.appengine.api.urlfetch.HTTPMethod; +import com.google.appengine.api.urlfetch.HTTPRequest; +import com.google.appengine.api.urlfetch.HTTPResponse; +import com.google.appengine.api.urlfetch.URLFetchService; +import com.google.appengine.api.urlfetch.URLFetchServiceFactory; + +class AppEngineClientConnection implements ManagedClientConnection { + // Managed is the composition of ConnectionReleaseTrigger, + // HttpClientConnection, HttpConnection, HttpInetConnection + + private HttpRoute _route; + private Object _state; + private boolean _reuseable; + + public AppEngineClientConnection(HttpRoute route, Object state) { + _route = route; + _state = state; + } + + // ManagedClientConnection methods + + public HttpRoute getRoute() { + return _route; + } + + public Object getState() { + return _state; + } + + public SSLSession getSSLSession() { + return null; + } + + public boolean isSecure() { + // XXX maybe parse the url to see if it's https? + return false; + } + + public boolean isMarkedReusable() { + return _reuseable; + } + + public void markReusable() { + _reuseable = true; + } + + public void layerProtocol(HttpContext context, HttpParams params) { + return; + } + + public void open(HttpRoute route, HttpContext context, HttpParams params) { + return; + } + + public void setIdleDuration(long duration, TimeUnit unit) { + return; + } + + public void setState(Object state) { + _state = state; + } + + public void tunnelProxy(HttpHost next, boolean secure, HttpParams params) { + return; + } + + public void tunnelTarget(boolean secure, HttpParams params) { + return; + } + + public void unmarkReusable() { + _reuseable = false; + } + + + // ConnectionReleaseTrigger methods + + public void releaseConnection() { + return; + } + + public void abortConnection() { + return; + } + + // HttpClientConnection methods + + private HTTPRequest _appengine_hrequest; + private HTTPResponse _appengine_hresponse; + + public void flush() { + return; + } + + public boolean isResponseAvailable(int timeout) { + // XXX possibly use Async fetcher + return true; + } + + public void receiveResponseEntity(org.apache.http.HttpResponse apache_response) { + byte[] data = _appengine_hresponse.getContent(); + + if (data != null) { + apache_response.setEntity(new ByteArrayEntity(data)); + } + } + + public HttpResponse receiveResponseHeader() { + URLFetchService ufs = URLFetchServiceFactory.getURLFetchService(); + try { + _appengine_hresponse = ufs.fetch(_appengine_hrequest); + } catch (java.io.IOException e) { + throw new RuntimeException(e); + } + + org.apache.http.HttpResponse apache_response = + new BasicHttpResponse(new ProtocolVersion("HTTP", 1, 0), + _appengine_hresponse.getResponseCode(), + null); + + for (HTTPHeader h : _appengine_hresponse.getHeaders()) { + apache_response.addHeader(h.getName(), h.getValue()); + } + + return apache_response; + } + + public void sendRequestEntity(org.apache.http.HttpEntityEnclosingRequest request) { + ByteArrayOutputStream os = new ByteArrayOutputStream(); + + org.apache.http.HttpEntity ent = request.getEntity(); + if (ent != null) { + try { + ent.writeTo(os); + } catch (java.io.IOException e) { + throw new RuntimeException(e); + } + } + + _appengine_hrequest.setPayload(os.toByteArray()); + } + + public void sendRequestHeader(org.apache.http.HttpRequest apache_request) { + URL request_url; + + HttpHost host = _route.getTargetHost(); + + String protocol = host.getSchemeName(); + String addr = host.getHostName(); + int port = host.getPort(); + + String path = apache_request.getRequestLine().getUri(); + + try { + request_url = new URL(protocol, addr, port, path); + } catch (java.net.MalformedURLException e) { + throw new RuntimeException(e); + } + + HTTPMethod method = HTTPMethod.valueOf(apache_request.getRequestLine().getMethod()); + _appengine_hrequest = new HTTPRequest(request_url, method, allowTruncate() + .doNotFollowRedirects()); + + Header[] apache_headers = apache_request.getAllHeaders(); + for (int i = 0; i < apache_headers.length; i++) { + Header h = apache_headers[i]; + _appengine_hrequest + .setHeader(new HTTPHeader(h.getName(), h.getValue())); + } + } + + // HttpConnection methods + + public void close() { + return; + } + + public HttpConnectionMetrics getMetrics() { + return null; + } + + public int getSocketTimeout() { + return -1; + } + + public boolean isOpen() { + return true; + } + + public boolean isStale() { + return false; + } + + public void setSocketTimeout(int timeout) { + return; + } + + public void shutdown() { + return; + } + + // HttpInetConnection methods + + public InetAddress getLocalAddress() { + return null; + } + + public int getLocalPort() { + return -1; + } + + public InetAddress getRemoteAddress() { + return null; + } + + public int getRemotePort() { + return -1; + } +} \ No newline at end of file diff --git a/broker/appengine/src/com/google/refine/appengine/AppEngineClientConnectionManager.java b/broker/appengine/src/com/google/refine/appengine/AppEngineClientConnectionManager.java new file mode 100644 index 000000000..72dc29029 --- /dev/null +++ b/broker/appengine/src/com/google/refine/appengine/AppEngineClientConnectionManager.java @@ -0,0 +1,76 @@ +package com.google.refine.appengine; + +import java.net.InetAddress; +import java.net.Socket; +import java.util.concurrent.TimeUnit; + +import org.apache.http.conn.ClientConnectionManager; +import org.apache.http.conn.ClientConnectionRequest; +import org.apache.http.conn.ManagedClientConnection; +import org.apache.http.conn.routing.HttpRoute; +import org.apache.http.conn.scheme.Scheme; +import org.apache.http.conn.scheme.SchemeRegistry; +import org.apache.http.conn.scheme.SocketFactory; +import org.apache.http.params.HttpParams; + +public class AppEngineClientConnectionManager implements ClientConnectionManager { + + private SchemeRegistry schemes; + + class NoopSocketFactory implements SocketFactory { + public Socket connectSocket(Socket sock, String host, int port, InetAddress addr, int lport, HttpParams params) { + return null; + } + + public Socket createSocket() { + return null; + } + + public boolean isSecure(Socket sock) { + return false; + } + } + + public AppEngineClientConnectionManager() { + SocketFactory noop_sf = new NoopSocketFactory(); + schemes = new SchemeRegistry(); + schemes.register(new Scheme("http", noop_sf, 80)); + schemes.register(new Scheme("https", noop_sf, 443)); + } + + public void closeExpiredConnections() { + return; + } + + public void closeIdleConnections(long idletime, TimeUnit tunit) { + return; + } + + public ManagedClientConnection getConnection(HttpRoute route, Object state) { + return new AppEngineClientConnection(route, state); + } + + public SchemeRegistry getSchemeRegistry() { + return schemes; + } + + public void releaseConnection(ManagedClientConnection conn, long valid, TimeUnit tuint) { + return; + } + + public ClientConnectionRequest requestConnection(final HttpRoute route, final Object state) { + return new ClientConnectionRequest() { + public void abortRequest() { + return; + } + + public ManagedClientConnection getConnection(long idletime, TimeUnit tunit) { + return AppEngineClientConnectionManager.this.getConnection(route, state); + } + }; + } + + public void shutdown() { + return; + } +} diff --git a/broker/appengine/src/com/google/refine/broker/AppEngineGridworksBrokerImpl.java b/broker/appengine/src/com/google/refine/broker/AppEngineGridworksBrokerImpl.java new file mode 100644 index 000000000..f583e9d31 --- /dev/null +++ b/broker/appengine/src/com/google/refine/broker/AppEngineGridworksBrokerImpl.java @@ -0,0 +1,373 @@ +package com.google.refine.broker; + +import java.io.Writer; +import java.util.ArrayList; +import java.util.List; + +import javax.jdo.Extent; +import javax.jdo.JDOHelper; +import javax.jdo.PersistenceManager; +import javax.jdo.PersistenceManagerFactory; +import javax.jdo.Transaction; +import javax.jdo.annotations.IdGeneratorStrategy; +import javax.jdo.annotations.PersistenceCapable; +import javax.jdo.annotations.Persistent; +import javax.jdo.annotations.PrimaryKey; +import javax.servlet.ServletConfig; +import javax.servlet.http.HttpServletResponse; + +import org.apache.http.client.HttpClient; +import org.apache.http.conn.ClientConnectionManager; +import org.apache.http.impl.client.DefaultHttpClient; +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.appengine.api.datastore.Text; +import com.google.refine.appengine.AppEngineClientConnectionManager; +import com.google.refine.broker.GridworksBroker; + +public class AppEngineGridworksBrokerImpl extends GridworksBroker { + + protected static final Logger logger = LoggerFactory.getLogger("gridworks.broker.appengine"); + + PersistenceManagerFactory pmfInstance; + + @Override + public void init(ServletConfig config) throws Exception { + super.init(config); + + pmfInstance = JDOHelper.getPersistenceManagerFactory("transactional"); + } + + @Override + public void destroy() throws Exception { + } + + // --------------------------------------------------------------------------------- + + protected HttpClient getHttpClient() { + ClientConnectionManager cm = new AppEngineClientConnectionManager(); + return new DefaultHttpClient(cm, null); + } + + // --------------------------------------------------------------------------------- + + protected void expire(HttpServletResponse response) throws Exception { + // TODO: implement + } + + protected void startProject(HttpServletResponse response, String pid, String uid, String lock, byte[] data, String metadata, List transformations) throws Exception { + // TODO: implement + } + + protected void expireLocks(HttpServletResponse response) throws Exception { + + PersistenceManager pm = pmfInstance.getPersistenceManager(); + + try { + Extent extent = pm.getExtent(Lock.class, false); + + try { + for (Lock lock : extent) { + if (lock.timestamp + LOCK_DURATION < System.currentTimeMillis()) { + Transaction tx = pm.currentTransaction(); + try { + tx.begin(); + pm.deletePersistent(lock); + tx.commit(); + } finally { + if (tx.isActive()) { + tx.rollback(); + } + } + } + } + } finally { + extent.closeAll(); + } + + respond(response, OK); + + } finally { + pm.close(); + } + } + + protected void getState(HttpServletResponse response, String pid, String uid, int rev) throws Exception { + PersistenceManager pm = pmfInstance.getPersistenceManager(); + + try { + // TODO: implement + respond(response, lockToJSON(getLock(pm,pid))); + } finally { + pm.close(); + } + } + + protected void obtainLock(HttpServletResponse response, String pid, String uid, int locktype, String lockvalue) throws Exception { + PersistenceManager pm = pmfInstance.getPersistenceManager(); + + // TODO: implement + + try { + Lock lock = getLock(pm, pid); + if (lock == null) { + Transaction tx = pm.currentTransaction(); + + try { + tx.begin(); + lock = new Lock(Long.toHexString(tx.hashCode()), pid, uid); + pm.makePersistent(lock); + tx.commit(); + } finally { + if (tx.isActive()) { + tx.rollback(); + } + } + } + + respond(response, lockToJSON(lock)); + + } finally { + pm.close(); + } + } + + protected void releaseLock(HttpServletResponse response, String pid, String uid, String lid) throws Exception { + + PersistenceManager pm = pmfInstance.getPersistenceManager(); + + try { + Lock lock = getLock(pm, pid); + if (lock != null) { + if (!lock.id.equals(lid)) { + throw new RuntimeException("Lock id doesn't match, can't release the lock"); + } + if (!lock.uid.equals(uid)) { + throw new RuntimeException("User id doesn't match the lock owner, can't release the lock"); + } + + Transaction tx = pm.currentTransaction(); + + try { + tx.begin(); + pm.deletePersistent(lock); + tx.commit(); + } finally { + if (tx.isActive()) { + tx.rollback(); + } + } + } + + respond(response, OK); + + } finally { + pm.close(); + } + } + + // ---------------------------------------------------------------------------------------------------- + + protected void startProject(HttpServletResponse response, String pid, String uid, String lid, String data) throws Exception { + PersistenceManager pm = pmfInstance.getPersistenceManager(); + + try { + checkLock(pm, pid, uid, lid); + + Project project = getProject(pm, pid); + + if (project != null) { + throw new RuntimeException("Project '" + pid + "' already exists"); + } + + Transaction tx = pm.currentTransaction(); + + try { + tx.begin(); + project = new Project(pid, data); + pm.makePersistent(project); + tx.commit(); + } finally { + if (tx.isActive()) { + tx.rollback(); + } + } + + respond(response, OK); + } finally { + pm.close(); + } + } + + protected void addTransformations(HttpServletResponse response, String pid, String uid, String lid, List transformations) throws Exception { + PersistenceManager pm = pmfInstance.getPersistenceManager(); + + try { + checkLock(pm, pid, uid, lid); + + Project project = getProject(pm, pid); + + if (project == null) { + throw new RuntimeException("Project '" + pid + "' not found"); + } + + Transaction tx = pm.currentTransaction(); + + try { + for (String s : transformations) { + project.transformations.add(new Text(s)); + } + tx.commit(); + } finally { + if (tx.isActive()) { + tx.rollback(); + } + } + + respond(response, OK); + } finally { + pm.close(); + } + } + + // --------------------------------------------------------------------------------- + + protected void openProject(HttpServletResponse response, String pid) throws Exception { + PersistenceManager pm = pmfInstance.getPersistenceManager(); + + try { + Project project = getProject(pm, pid); + + Writer w = response.getWriter(); + JSONWriter writer = new JSONWriter(w); + writer.object(); + writer.key("data"); writer.value(project.data.toString()); + writer.key("transformations"); + writer.array(); + for (Text s : project.transformations) { + writer.value(s.toString()); + } + writer.endArray(); + writer.endObject(); + w.flush(); + w.close(); + } finally { + pm.close(); + } + } + + protected void getHistory(HttpServletResponse response, String pid, int tindex) throws Exception { + PersistenceManager pm = pmfInstance.getPersistenceManager(); + + try { + Project project = getProject(pm, pid); + + Writer w = response.getWriter(); + JSONWriter writer = new JSONWriter(w); + writer.object(); + writer.key("transformations"); + writer.array(); + int size = project.transformations.size(); + for (int i = tindex; i < size; i++) { + writer.value(project.transformations.get(i).toString()); + } + writer.endArray(); + writer.endObject(); + w.flush(); + w.close(); + } finally { + pm.close(); + } + } + + // --------------------------------------------------------------------------------- + + Project getProject(PersistenceManager pm, String pid) { + Project project = pm.getObjectById(Project.class, pid); + if (project == null) { + throw new RuntimeException("Project '" + pid + "' is not managed by this broker"); + } + return project; + } + + @PersistenceCapable + static class Project { + + @PrimaryKey + @Persistent(valueStrategy = IdGeneratorStrategy.IDENTITY) + String pid; + + @Persistent + List transformations = new ArrayList(); + + @Persistent + Text data; + + Project(String pid, String data) { + this.pid = pid; + this.data = new Text(data); + } + } + + // --------------------------------------------------------------------------------- + + Lock getLock(PersistenceManager pm, String pid) { + return pm.getObjectById(Lock.class, pid); + } + + void checkLock(PersistenceManager pm, String pid, String uid, String lid) { + Lock lock = getLock(pm, pid); + + if (lock == null) { + throw new RuntimeException("No lock was found with the given Lock id '" + lid + "', you have to have a valid lock on a project in order to start it"); + } + + if (!lock.pid.equals(pid)) { + throw new RuntimeException("Lock '" + lid + "' is for another project: " + pid); + } + + if (!lock.uid.equals(uid)) { + throw new RuntimeException("Lock '" + lid + "' is owned by another user: " + uid); + } + } + + JSONObject lockToJSON(Lock lock) throws JSONException { + JSONObject o = new JSONObject(); + if (lock != null) { + o.put("lock_id", lock.id); + o.put("project_id", lock.pid); + o.put("user_id", lock.uid); + o.put("timestamp", lock.timestamp); + } + return o; + } + + @PersistenceCapable + static class Lock { + + @Persistent + String id; + + @PrimaryKey + @Persistent(valueStrategy = IdGeneratorStrategy.IDENTITY) + String pid; + + @Persistent + String uid; + + @Persistent + long timestamp; + + Lock(String id, String pid, String uid) { + this.id = id; + this.pid = pid; + this.uid = uid; + this.timestamp = System.currentTimeMillis(); + } + } + +} diff --git a/broker/core/src/com/google/gridworks/broker/GridworksBroker.java b/broker/core/src/com/google/gridworks/broker/GridworksBroker.java deleted file mode 100644 index f70f3a2f2..000000000 --- a/broker/core/src/com/google/gridworks/broker/GridworksBroker.java +++ /dev/null @@ -1,311 +0,0 @@ - -package com.google.gridworks.broker; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.PrintWriter; -import java.io.StringWriter; -import java.io.Writer; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; - -import javax.servlet.ServletConfig; -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.apache.http.NameValuePair; -import org.apache.http.client.HttpClient; -import org.apache.http.client.ResponseHandler; -import org.apache.http.client.entity.UrlEncodedFormEntity; -import org.apache.http.client.methods.HttpPost; -import org.apache.http.impl.client.BasicResponseHandler; -import org.apache.http.message.BasicNameValuePair; -import org.apache.http.params.CoreProtocolPNames; -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import edu.mit.simile.butterfly.ButterflyModuleImpl; - -/** - * This class contains all the code shared by various implementations of a Gridworks Broker. - * - * A broker is a server used by multiple Gridworks installations to enable collaborative - * development over the same project. - * - * Broker implementations differ in how they store their state but all of them are required - * to extend this abstract class and implement the services that are called via HTTP. - * - */ -public abstract class GridworksBroker extends ButterflyModuleImpl { - - static final public String GET_STATE = "get_state"; - static final public String EXPIRE = "expire"; - static final public String OBTAIN_LOCK = "obtain_lock"; - static final public String RELEASE_LOCK = "release_lock"; - static final public String TRANSFORM = "transform"; - static final public String START = "start"; - static final public String OPEN = "open"; - - static final public int ALL = 0; - static final public int COL = 1; - static final public int CELL = 2; - - static final protected Logger logger = LoggerFactory.getLogger("gridworks.broker"); - - static final protected String USER_INFO_URL = "http://www.freebase.com/api/service/user_info"; - static final protected String DELEGATED_OAUTH_HEADER = "X-Freebase-Credentials"; - static final protected String OAUTH_HEADER = "Authorization"; - - static protected String OK; - - static { - try { - JSONObject o = new JSONObject(); - o.put("status","ok"); - OK = o.toString(); - } catch (JSONException e) { - // not going to happen; - } - } - - static public final long LOCK_DURATION = 60 * 1000; // 1 minute - static public final long USER_DURATION = 5 * 60 * 1000; // 1 minute - static public final long LOCK_EXPIRATION_CHECK_DELAY = 5 * 1000; // 5 seconds - - protected HttpClient httpclient; - - protected boolean developmentMode; - - @Override - public void init(ServletConfig config) throws Exception { - super.init(config); - httpclient = getHttpClient(); - developmentMode = Boolean.parseBoolean(config.getInitParameter("gridworks.development")); - if (developmentMode) logger.warn("Running in development mode"); - } - - @Override - public void destroy() throws Exception { - httpclient.getConnectionManager().shutdown(); - } - - @Override - public boolean process(String path, HttpServletRequest request, HttpServletResponse response) throws Exception { - if (logger.isDebugEnabled()) { - logger.debug("> process '{}'", path); - } else { - logger.info("process '{}'", path); - } - - try { - - if (GET_STATE.equals(path)) { - response.setCharacterEncoding("UTF-8"); - response.setHeader("Content-Type", "application/json"); - getState(response, getParameter(request, "pid"), getUserId(request), getInteger(request, "rev")); - } else if (EXPIRE.equals(path)) { - response.setCharacterEncoding("UTF-8"); - response.setHeader("Content-Type", "application/json"); - expire(response); - } else if (OBTAIN_LOCK.equals(path)) { - response.setCharacterEncoding("UTF-8"); - response.setHeader("Content-Type", "application/json"); - obtainLock(response, getParameter(request, "pid"), getUserId(request), getInteger(request, "locktype"), getParameter(request, "lockvalue")); - } else if (RELEASE_LOCK.equals(path)) { - response.setCharacterEncoding("UTF-8"); - response.setHeader("Content-Type", "application/json"); - releaseLock(response, getParameter(request, "pid"), getUserId(request), getParameter(request, "lock")); - } else if (TRANSFORM.equals(path)) { - response.setCharacterEncoding("UTF-8"); - response.setHeader("Content-Type", "application/json"); - addTransformations(response, getParameter(request, "pid"), getUserId(request), getParameter(request, "lock"), getList(request, "transformations")); - } else if (START.equals(path)) { - response.setCharacterEncoding("UTF-8"); - response.setHeader("Content-Type", "application/json"); - startProject(response, getParameter(request, "pid"), getUserId(request), getParameter(request, "lock"), getData(request), getParameter(request, "metadata"), getList(request, "transformations")); - } else if (OPEN.equals(path)) { - response.setCharacterEncoding("UTF-8"); - response.setHeader("Content-Type", "application/json"); - openProject(response, getParameter(request, "pid")); - } else { - boolean value = super.process(path, request, response); - if (logger.isDebugEnabled()) logger.debug("< process '{}'", path); - return value; - } - - } catch (RuntimeException e) { - logger.error("runtime error", e.getMessage()); - respondError(response, e.getMessage()); - } catch (Exception e) { - logger.error("internal error", e); - respondException(response, e); - } - - if (logger.isDebugEnabled()) logger.debug("< process '{}'", path); - - return true; - } - - // ---------------------------------------------------------------------------------------- - - protected abstract HttpClient getHttpClient(); - - protected abstract void expire(HttpServletResponse response) throws Exception; - - protected abstract void getState(HttpServletResponse response, String pid, String uid, int rev) throws Exception; - - protected abstract void obtainLock(HttpServletResponse response, String pid, String uid, int locktype, String lockvalue) throws Exception; - - protected abstract void releaseLock(HttpServletResponse response, String pid, String uid, String lock) throws Exception; - - protected abstract void startProject(HttpServletResponse response, String pid, String uid, String lock, byte[] data, String metadata, List transformations) throws Exception; - - protected abstract void addTransformations(HttpServletResponse response, String pid, String uid, String lock, List transformations) throws Exception; - - protected abstract void openProject(HttpServletResponse response, String pid) throws Exception; - - // ---------------------------------------------------------------------------------------- - - @SuppressWarnings("unchecked") - protected String getUserId(HttpServletRequest request) throws Exception { - - // This is useful for testing - if (developmentMode) { - return getParameter(request, "uid"); - } - - String oauth = request.getHeader(DELEGATED_OAUTH_HEADER); - if (oauth == null) { - throw new RuntimeException("The request needs to contain the '" + DELEGATED_OAUTH_HEADER + "' header set to obtain user identity via Freebase."); - } - - List formparams = new ArrayList(); - Map params = (Map) request.getParameterMap(); - for (Entry e : params.entrySet()) { - formparams.add(new BasicNameValuePair((String) e.getKey(), (String) e.getValue())); - } - UrlEncodedFormEntity entity = new UrlEncodedFormEntity(formparams, "UTF-8"); - - HttpPost httpRequest = new HttpPost(USER_INFO_URL); - httpRequest.setHeader(OAUTH_HEADER, oauth); - httpRequest.getParams().setParameter(CoreProtocolPNames.USER_AGENT, "Gridworks Broker"); - httpRequest.setEntity(entity); - - ResponseHandler responseHandler = new BasicResponseHandler(); - String responseBody = httpclient.execute(httpRequest, responseHandler); - JSONObject o = new JSONObject(responseBody); - - return o.getString("username"); - } - - // ---------------------------------------------------------------------------------------- - - static protected String getParameter(HttpServletRequest request, String name) throws ServletException { - String param = request.getParameter(name); - if (param == null) { - throw new RuntimeException("request must come with a '" + name + "' parameter"); - } - return param; - } - - static protected List getList(HttpServletRequest request, String name) throws ServletException, JSONException { - String param = getParameter(request, name); - JSONArray a = new JSONArray(param); - List result = new ArrayList(a.length()); - for (int i = 0; i < a.length(); i++) { - result.add(a.getString(i)); - } - return result; - } - - static protected int getInteger(HttpServletRequest request, String name) throws ServletException, JSONException { - return Integer.parseInt(getParameter(request, name)); - } - - static protected byte[] getData(HttpServletRequest request) throws ServletException, IOException { - ByteArrayOutputStream output = new ByteArrayOutputStream(); - InputStream input = request.getInputStream(); - byte[] buffer = new byte[4096]; - int count = 0; - int n = 0; - while (-1 != (n = input.read(buffer))) { - output.write(buffer, 0, n); - count += n; - } - return output.toByteArray(); - - } - - static protected void respondError(HttpServletResponse response, String error) throws IOException, ServletException { - - if (response == null) { - throw new ServletException("Response object can't be null"); - } - - try { - JSONObject o = new JSONObject(); - o.put("status", "error"); - o.put("message", error); - response.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); - respond(response, o.toString()); - } catch (JSONException e) { - e.printStackTrace(response.getWriter()); - } - } - - static protected void respondException(HttpServletResponse response, Exception e) throws IOException, ServletException { - - if (response == null) { - throw new ServletException("Response object can't be null"); - } - - try { - JSONObject o = new JSONObject(); - o.put("status", "error"); - o.put("message", e.getMessage()); - - StringWriter sw = new StringWriter(); - PrintWriter pw = new PrintWriter(sw); - e.printStackTrace(pw); - pw.flush(); - sw.flush(); - - o.put("stack", sw.toString()); - - response.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); - respond(response, o.toString()); - } catch (JSONException e1) { - e.printStackTrace(response.getWriter()); - } - } - - static protected void respond(HttpServletResponse response, JSONObject content) throws IOException, ServletException { - if (content == null) { - throw new ServletException("Content object can't be null"); - } - - respond(response, content.toString()); - } - - static protected void respond(HttpServletResponse response, String content) throws IOException, ServletException { - if (response == null) { - throw new ServletException("Response object can't be null"); - } - - Writer w = response.getWriter(); - if (w != null) { - w.write(content); - w.flush(); - w.close(); - } else { - throw new ServletException("response returned a null writer"); - } - } -} diff --git a/broker/core/src/com/google/gridworks/broker/GridworksBrokerImpl.java b/broker/core/src/com/google/gridworks/broker/GridworksBrokerImpl.java deleted file mode 100644 index a952ccb4e..000000000 --- a/broker/core/src/com/google/gridworks/broker/GridworksBrokerImpl.java +++ /dev/null @@ -1,592 +0,0 @@ -package com.google.gridworks.broker; - -import static com.sleepycat.persist.model.Relationship.MANY_TO_ONE; - -import java.io.File; -import java.io.Writer; -import java.util.ArrayList; -import java.util.List; -import java.util.Timer; -import java.util.TimerTask; - -import javax.servlet.ServletConfig; -import javax.servlet.http.HttpServletResponse; - -import org.apache.http.client.HttpClient; -import org.apache.http.impl.client.DefaultHttpClient; -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.sleepycat.je.Environment; -import com.sleepycat.je.EnvironmentConfig; -import com.sleepycat.je.Transaction; -import com.sleepycat.persist.EntityCursor; -import com.sleepycat.persist.EntityStore; -import com.sleepycat.persist.PrimaryIndex; -import com.sleepycat.persist.SecondaryIndex; -import com.sleepycat.persist.StoreConfig; -import com.sleepycat.persist.model.Entity; -import com.sleepycat.persist.model.PrimaryKey; -import com.sleepycat.persist.model.SecondaryKey; - -public class GridworksBrokerImpl extends GridworksBroker { - - protected static final Logger logger = LoggerFactory.getLogger("gridworks.broker.local"); - - Environment env; - - EntityStore projectStore; - EntityStore lockStore; - EntityStore userStore; - - PrimaryIndex projectById; - PrimaryIndex lockById; - - SecondaryIndex locksByProject; - - Timer timer; - Expirer expirer; - - @Override - public void init(ServletConfig config) throws Exception { - logger.trace("> init"); - super.init(config); - - timer = new Timer(); - expirer = new Expirer(); - timer.schedule(expirer, 0, LOCK_EXPIRATION_CHECK_DELAY); - - String dataDir = config.getInitParameter("gridworks.data"); - if (dataDir == null) dataDir = "data"; - File dataPath = new File(dataDir); - if (!dataPath.exists()) dataPath.mkdirs(); - - EnvironmentConfig envConfig = new EnvironmentConfig(); - envConfig.setAllowCreate(true); - envConfig.setTransactional(true); - env = new Environment(dataPath, envConfig); - - StoreConfig storeConfig = new StoreConfig(); - storeConfig.setAllowCreate(true); - storeConfig.setTransactional(true); - projectStore = new EntityStore(env, "ProjectsStore", storeConfig); - lockStore = new EntityStore(env, "LockStore", storeConfig); - - projectById = projectStore.getPrimaryIndex(String.class, Project.class); - lockById = lockStore.getPrimaryIndex(String.class, Lock.class); - - locksByProject = lockStore.getSecondaryIndex(lockById, String.class, "pid"); - logger.trace("< init"); - } - - @Override - public void destroy() throws Exception { - logger.trace("> destroy"); - super.destroy(); - - if (projectStore != null) { - projectStore.close(); - projectById = null; - } - - if (lockStore != null) { - lockStore.close(); - lockById = null; - } - - if (timer != null) { - timer.cancel(); - timer.purge(); - timer = null; - } - - if (env != null) { - env.close(); - env = null; - } - logger.trace("< destroy"); - } - - class Expirer extends TimerTask { - public void run() { - if (lockById != null) { - logger.trace("> expire"); - Transaction txn = env.beginTransaction(null, null); - try { - EntityCursor cursor = lockById.entities(); - try { - for (Lock lock : cursor) { - if (lock.timestamp + LOCK_DURATION < System.currentTimeMillis()) { - logger.trace("Found expired lock {}", lock.id); - try { - releaseLock(null, lock.pid, lock.uid, lock.id); - } catch (Exception e) { - logger.error("Exception while expiring lock for project '" + lock.pid + "'", e); - } - } - } - } finally { - cursor.close(); - } - } finally { - if (txn != null) { - txn.abort(); - txn = null; - } - } - logger.trace("< expire"); - } - } - } - - // --------------------------------------------------------------------------------- - - @Override - protected HttpClient getHttpClient() { - return new DefaultHttpClient(); - } - - // --------------------------------------------------------------------------------- - - @Override - protected void expire(HttpServletResponse response) throws Exception { - expirer.run(); - respond(response, OK); - } - - @Override - protected void obtainLock(HttpServletResponse response, String pid, String uid, int locktype, String lockvalue) throws Exception { - logger.trace("> obtain lock"); - Lock lock = null; - Lock blocker = null; - - Transaction txn = env.beginTransaction(null, null); - - try { - - EntityCursor cursor = locksByProject.subIndex(pid).entities(); - - /* - * ALL - * blocked -> somebody else's lock - * reuse -> you already have an ALL lock - * new -> else - * - * COL - * blocked -> somebody else's all lock || a lock on the same col - * reuse -> you have an ALL lock || a lock on the same col - * new -> else - * - * CELL - * blocked -> somebody else's all lock || a lock on the same col || a lock on the same cell - * reuse -> you have a lock on the same cell - * yes -> (you have a lock on the same cell) && (nobody else has a lock on the same cell || the same col || all) - * new -> else - * - */ - - try { - if (locktype == ALL) { - if (lockvalue.length() > 0) { - throw new RuntimeException("Hmm, seems like you're calling an ALL with a specific value, are you sure you didn't want another type of lock?"); - } - - for (Lock l : cursor) { - if (!l.uid.equals(uid)) { - blocker = l; - break; - } else { - if (l.type == ALL) { - lock = l; - break; - } - } - } - } else if (locktype == COL) { - if (lockvalue.indexOf(',') > -1) { - throw new RuntimeException("Hmm, seems like you're calling a COL lock with a CELL value"); - } - - for (Lock l : cursor) { - if (!l.uid.equals(uid)) { - if (l.type == ALL || - (l.type == COL && l.value.equals(lockvalue)) || - (l.type == CELL && l.value.split(",")[0].equals(lockvalue))) { - blocker = l; - break; - } - } else { - if (l.type == ALL || - (l.type == COL && l.value.equals(lockvalue))) { - lock = l; - break; - } - } - } - } else if (locktype == CELL) { - if (lockvalue.indexOf(',') == -1) { - throw new RuntimeException("Hmm, seems like you're calling a CELL lock without specifying row and column: format must be 'row,column'"); - } - - for (Lock l : cursor) { - if (!l.uid.equals(uid)) { - if (l.type == ALL || - (l.type == COL && l.value.equals(lockvalue.split(",")[0])) || - (l.type == CELL && l.value.equals(lockvalue))) { - blocker = l; - break; - } - } else { - if (l.type == ALL || - (l.type == COL && l.value.equals(lockvalue.split(",")[0])) || - (l.type == CELL && l.value.equals(lockvalue))) { - lock = l; - break; - } - } - } - } - } finally { - cursor.close(); - } - - if (blocker != null) { - logger.info("found a blocking lock {}", lockToString(blocker)); - throw new RuntimeException("Can't obtain lock, it is blocked by a type '" + blocker.type + "' lock owned by '" + blocker.uid + "'"); - } - - if (lock == null) { - logger.info("no comparable lock already exists, creating a new one"); - lock = new Lock(Long.toHexString(txn.getId()), pid, uid, locktype, lockvalue); - lockById.put(txn, lock); - txn.commit(); - } - - } finally { - if (txn != null) { - txn.abort(); - txn = null; - } - } - - JSONObject o = lockToJSON(lock, uid); - o.put("status", "ok"); - respond(response, o); - - logger.trace("< obtain lock"); - } - - @Override - protected void releaseLock(HttpServletResponse response, String pid, String uid, String lid) throws Exception { - - Transaction txn = env.beginTransaction(null, null); - - try { - Lock lock = getLock(lid, pid, uid); - if (lock != null) { - if (!lock.uid.equals(uid)) { - throw new RuntimeException("User id doesn't match the lock owner, can't release the lock"); - } - lockById.delete(lid); - txn.commit(); - } - } finally { - if (txn != null) { - txn.abort(); - txn = null; - } - } - - if (response != null) { // this because the expiration thread can call this method without a real response - respond(response, OK); - } - } - - // ---------------------------------------------------------------------------------------------------- - - @Override - protected void startProject(HttpServletResponse response, String pid, String uid, String lid, byte[] data, String metadata, List transformations) throws Exception { - - Transaction txn = env.beginTransaction(null, null); - - try { - if (projectById.contains(pid)) { - throw new RuntimeException("Project '" + pid + "' already exists"); - } - - Lock lock = getLock(lid, pid, uid); - - if (lock.type != ALL) { - throw new RuntimeException("The lock you have is not enough to start a project"); - } - - projectById.put(txn, new Project(pid, data, metadata, transformations)); - txn.commit(); - } finally { - if (txn != null) { - txn.abort(); - txn = null; - } - } - - respond(response, OK); - } - - @Override - protected void addTransformations(HttpServletResponse response, String pid, String uid, String lid, List transformations) throws Exception { - - Transaction txn = env.beginTransaction(null, null); - - try { - Project project = getProject(pid); - - if (project == null) { - throw new RuntimeException("Project '" + pid + "' not found"); - } - - Lock lock = getLock(lid, pid, uid); - - logger.info("obtained lock: {}", lockToString(lock)); - - if (lock.type == ALL) { - project.transformations.addAll(transformations); - } else { - for (String s : transformations) { - JSONObject o = new JSONObject(s); - - int type = o.getInt("op_type"); - String value = o.getString("op_value"); - if (lock.type == COL) { - if (type == COL) { - if (value != null && value.equals(lock.value)) { - project.transformations.add(s); - } else { - throw new RuntimeException("Can't apply '" + s + "': you have a lock for column '" + lock.value + "' and you're attempting to modify column '" + value + "'."); - } - } else if (type == CELL) { - String column = value.split(",")[0]; - if (column != null && column.equals(lock.value)) { - project.transformations.add(s); - } else { - throw new RuntimeException("Can't apply '" + s + "': you have a lock for column '" + lock.value + "' and you're attempting to modify cell '" + value + "' in another column."); - } - } - } else if (lock.type == CELL) { - if (type == COL) { - throw new RuntimeException("Can't apply '" + s + "': you offered a lock for a single cell and you're attempting an operation for the entire column."); - } else if (type == CELL) { - if (value != null && value.equals(lock.value)) { - project.transformations.add(s); - } else { - throw new RuntimeException("Can't apply '" + s + "': you have a lock for cell '" + lock.value + "' and you're attempting to modify cell '" + value + "'."); - } - } - } - } - } - - projectById.put(txn, project); - - txn.commit(); - } finally { - if (txn != null) { - txn.abort(); - txn = null; - } - } - - respond(response, OK); - } - - // --------------------------------------------------------------------------------- - - @Override - protected void openProject(HttpServletResponse response, String pid) throws Exception { - Project project = getProject(pid); - - Writer w = response.getWriter(); - JSONWriter writer = new JSONWriter(w); - writer.object(); - writer.key("status"); writer.value("ok"); - writer.key("data"); writer.value(project.data); - writer.key("metadata"); writer.value(new JSONObject(project.metadata)); - writer.key("transformations"); - writer.array(); - for (String s : project.transformations) { - writer.value(new JSONObject(s)); - } - writer.endArray(); - writer.endObject(); - w.flush(); - w.close(); - } - - // --------------------------------------------------------------------------------- - - @Override - protected void getState(HttpServletResponse response, String pid, String uid, int rev) throws Exception { - - Project project = getProject(pid); - - Writer w = response.getWriter(); - JSONWriter writer = new JSONWriter(w); - - writer.object(); - writer.key("status"); writer.value("ok"); - writer.key("transformations"); - writer.array(); - int size = project.transformations.size(); - for (int i = rev; i < size; i++) { - writer.value(new JSONObject(project.transformations.get(i))); - } - writer.endArray(); - - EntityCursor cursor = locksByProject.subIndex(pid).entities(); - - try { - writer.key("locks"); - writer.array(); - for (Lock lock : cursor) { - writer.value(lockToJSON(lock, uid)); - } - writer.endArray(); - writer.endObject(); - - w.flush(); - w.close(); - } finally { - cursor.close(); - } - } - - // --------------------------------------------------------------------------------- - - Project getProject(String pid) { - Project project = projectById.get(pid); - if (project == null) { - throw new RuntimeException("Project '" + pid + "' could not be found: are you sure is not managed by another broker?"); - } - return project; - } - - @Entity - static class Project { - - @PrimaryKey - String pid; - - List transformations; - - byte[] data; - - String metadata; - - int rev; - - Project(String pid, byte[] data, String metadata, List transformations) { - this.pid = pid; - this.data = data; - this.metadata = metadata; - this.transformations = (transformations != null) ? transformations : new ArrayList(); - this.rev = this.transformations.size(); - } - - @SuppressWarnings("unused") - private Project() {} - } - - // --------------------------------------------------------------------------------- - - Lock getLock(String lid, String pid, String uid) { - Lock lock = lockById.get(lid); - checkLock(lock, lid, pid, uid); - return lock; - } - - void checkLock(Lock lock, String lid, String pid, String uid) { - if (lock == null) { - throw new RuntimeException("No lock was found with the given Lock id '" + lid + "', you have to have a valid lock on a project in order to start it"); - } - - if (!lock.pid.equals(pid)) { - throw new RuntimeException("Lock '" + lock.id + "' is for another project: " + lock.pid); - } - - if (!lock.uid.equals(uid)) { - throw new RuntimeException("Lock '" + lock.id + "' is owned by another user: " + lock.uid); - } - } - - Lock getLock(String pid, String uid, int locktype) { - Lock lock = null; - EntityCursor cursor = locksByProject.subIndex(pid).entities(); - - try { - for (Lock l : cursor) { - if (uid.equals(l.uid) && (locktype == l.type)) { - lock = l; - break; - } - } - } finally { - cursor.close(); - } - - return lock; - } - - JSONObject lockToJSON(Lock lock, String uid) throws JSONException { - JSONObject o = new JSONObject(); - if (lock != null) { - // NOTE: only the owner of the lock should get the ID, - // otherwise others can just fake ownership of other people's locks - if (lock.uid.equals(uid)) { - o.put("lock", lock.id); - } - - o.put("pid", lock.pid); - o.put("uid", lock.uid); - o.put("type", lock.type); - o.put("value", lock.value); - o.put("timestamp", lock.timestamp); - } - return o; - } - - String lockToString(Lock lock) { - return lock.id + "," + lock.pid + "," + lock.uid + "," + lock.type + "," + lock.value; - } - - @Entity - static class Lock { - - @PrimaryKey - String id; - - @SecondaryKey(relate=MANY_TO_ONE) - String pid; - - String uid; - - int type; - - String value; - - long timestamp; - - Lock(String id, String pid, String uid, int type, String value) { - this.id = id; - this.pid = pid; - this.uid = uid; - this.type = type; - this.value = value; - this.timestamp = System.currentTimeMillis(); - } - - @SuppressWarnings("unused") - private Lock() {} - } -} diff --git a/broker/core/src/com/google/refine/broker/GridworksBroker.java b/broker/core/src/com/google/refine/broker/GridworksBroker.java new file mode 100644 index 000000000..6052b98f3 --- /dev/null +++ b/broker/core/src/com/google/refine/broker/GridworksBroker.java @@ -0,0 +1,311 @@ + +package com.google.refine.broker; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.io.Writer; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import javax.servlet.ServletConfig; +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.apache.http.NameValuePair; +import org.apache.http.client.HttpClient; +import org.apache.http.client.ResponseHandler; +import org.apache.http.client.entity.UrlEncodedFormEntity; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.impl.client.BasicResponseHandler; +import org.apache.http.message.BasicNameValuePair; +import org.apache.http.params.CoreProtocolPNames; +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import edu.mit.simile.butterfly.ButterflyModuleImpl; + +/** + * This class contains all the code shared by various implementations of a Gridworks Broker. + * + * A broker is a server used by multiple Gridworks installations to enable collaborative + * development over the same project. + * + * Broker implementations differ in how they store their state but all of them are required + * to extend this abstract class and implement the services that are called via HTTP. + * + */ +public abstract class GridworksBroker extends ButterflyModuleImpl { + + static final public String GET_STATE = "get_state"; + static final public String EXPIRE = "expire"; + static final public String OBTAIN_LOCK = "obtain_lock"; + static final public String RELEASE_LOCK = "release_lock"; + static final public String TRANSFORM = "transform"; + static final public String START = "start"; + static final public String OPEN = "open"; + + static final public int ALL = 0; + static final public int COL = 1; + static final public int CELL = 2; + + static final protected Logger logger = LoggerFactory.getLogger("gridworks.broker"); + + static final protected String USER_INFO_URL = "http://www.freebase.com/api/service/user_info"; + static final protected String DELEGATED_OAUTH_HEADER = "X-Freebase-Credentials"; + static final protected String OAUTH_HEADER = "Authorization"; + + static protected String OK; + + static { + try { + JSONObject o = new JSONObject(); + o.put("status","ok"); + OK = o.toString(); + } catch (JSONException e) { + // not going to happen; + } + } + + static public final long LOCK_DURATION = 60 * 1000; // 1 minute + static public final long USER_DURATION = 5 * 60 * 1000; // 1 minute + static public final long LOCK_EXPIRATION_CHECK_DELAY = 5 * 1000; // 5 seconds + + protected HttpClient httpclient; + + protected boolean developmentMode; + + @Override + public void init(ServletConfig config) throws Exception { + super.init(config); + httpclient = getHttpClient(); + developmentMode = Boolean.parseBoolean(config.getInitParameter("gridworks.development")); + if (developmentMode) logger.warn("Running in development mode"); + } + + @Override + public void destroy() throws Exception { + httpclient.getConnectionManager().shutdown(); + } + + @Override + public boolean process(String path, HttpServletRequest request, HttpServletResponse response) throws Exception { + if (logger.isDebugEnabled()) { + logger.debug("> process '{}'", path); + } else { + logger.info("process '{}'", path); + } + + try { + + if (GET_STATE.equals(path)) { + response.setCharacterEncoding("UTF-8"); + response.setHeader("Content-Type", "application/json"); + getState(response, getParameter(request, "pid"), getUserId(request), getInteger(request, "rev")); + } else if (EXPIRE.equals(path)) { + response.setCharacterEncoding("UTF-8"); + response.setHeader("Content-Type", "application/json"); + expire(response); + } else if (OBTAIN_LOCK.equals(path)) { + response.setCharacterEncoding("UTF-8"); + response.setHeader("Content-Type", "application/json"); + obtainLock(response, getParameter(request, "pid"), getUserId(request), getInteger(request, "locktype"), getParameter(request, "lockvalue")); + } else if (RELEASE_LOCK.equals(path)) { + response.setCharacterEncoding("UTF-8"); + response.setHeader("Content-Type", "application/json"); + releaseLock(response, getParameter(request, "pid"), getUserId(request), getParameter(request, "lock")); + } else if (TRANSFORM.equals(path)) { + response.setCharacterEncoding("UTF-8"); + response.setHeader("Content-Type", "application/json"); + addTransformations(response, getParameter(request, "pid"), getUserId(request), getParameter(request, "lock"), getList(request, "transformations")); + } else if (START.equals(path)) { + response.setCharacterEncoding("UTF-8"); + response.setHeader("Content-Type", "application/json"); + startProject(response, getParameter(request, "pid"), getUserId(request), getParameter(request, "lock"), getData(request), getParameter(request, "metadata"), getList(request, "transformations")); + } else if (OPEN.equals(path)) { + response.setCharacterEncoding("UTF-8"); + response.setHeader("Content-Type", "application/json"); + openProject(response, getParameter(request, "pid")); + } else { + boolean value = super.process(path, request, response); + if (logger.isDebugEnabled()) logger.debug("< process '{}'", path); + return value; + } + + } catch (RuntimeException e) { + logger.error("runtime error", e.getMessage()); + respondError(response, e.getMessage()); + } catch (Exception e) { + logger.error("internal error", e); + respondException(response, e); + } + + if (logger.isDebugEnabled()) logger.debug("< process '{}'", path); + + return true; + } + + // ---------------------------------------------------------------------------------------- + + protected abstract HttpClient getHttpClient(); + + protected abstract void expire(HttpServletResponse response) throws Exception; + + protected abstract void getState(HttpServletResponse response, String pid, String uid, int rev) throws Exception; + + protected abstract void obtainLock(HttpServletResponse response, String pid, String uid, int locktype, String lockvalue) throws Exception; + + protected abstract void releaseLock(HttpServletResponse response, String pid, String uid, String lock) throws Exception; + + protected abstract void startProject(HttpServletResponse response, String pid, String uid, String lock, byte[] data, String metadata, List transformations) throws Exception; + + protected abstract void addTransformations(HttpServletResponse response, String pid, String uid, String lock, List transformations) throws Exception; + + protected abstract void openProject(HttpServletResponse response, String pid) throws Exception; + + // ---------------------------------------------------------------------------------------- + + @SuppressWarnings("unchecked") + protected String getUserId(HttpServletRequest request) throws Exception { + + // This is useful for testing + if (developmentMode) { + return getParameter(request, "uid"); + } + + String oauth = request.getHeader(DELEGATED_OAUTH_HEADER); + if (oauth == null) { + throw new RuntimeException("The request needs to contain the '" + DELEGATED_OAUTH_HEADER + "' header set to obtain user identity via Freebase."); + } + + List formparams = new ArrayList(); + Map params = (Map) request.getParameterMap(); + for (Entry e : params.entrySet()) { + formparams.add(new BasicNameValuePair((String) e.getKey(), (String) e.getValue())); + } + UrlEncodedFormEntity entity = new UrlEncodedFormEntity(formparams, "UTF-8"); + + HttpPost httpRequest = new HttpPost(USER_INFO_URL); + httpRequest.setHeader(OAUTH_HEADER, oauth); + httpRequest.getParams().setParameter(CoreProtocolPNames.USER_AGENT, "Gridworks Broker"); + httpRequest.setEntity(entity); + + ResponseHandler responseHandler = new BasicResponseHandler(); + String responseBody = httpclient.execute(httpRequest, responseHandler); + JSONObject o = new JSONObject(responseBody); + + return o.getString("username"); + } + + // ---------------------------------------------------------------------------------------- + + static protected String getParameter(HttpServletRequest request, String name) throws ServletException { + String param = request.getParameter(name); + if (param == null) { + throw new RuntimeException("request must come with a '" + name + "' parameter"); + } + return param; + } + + static protected List getList(HttpServletRequest request, String name) throws ServletException, JSONException { + String param = getParameter(request, name); + JSONArray a = new JSONArray(param); + List result = new ArrayList(a.length()); + for (int i = 0; i < a.length(); i++) { + result.add(a.getString(i)); + } + return result; + } + + static protected int getInteger(HttpServletRequest request, String name) throws ServletException, JSONException { + return Integer.parseInt(getParameter(request, name)); + } + + static protected byte[] getData(HttpServletRequest request) throws ServletException, IOException { + ByteArrayOutputStream output = new ByteArrayOutputStream(); + InputStream input = request.getInputStream(); + byte[] buffer = new byte[4096]; + int count = 0; + int n = 0; + while (-1 != (n = input.read(buffer))) { + output.write(buffer, 0, n); + count += n; + } + return output.toByteArray(); + + } + + static protected void respondError(HttpServletResponse response, String error) throws IOException, ServletException { + + if (response == null) { + throw new ServletException("Response object can't be null"); + } + + try { + JSONObject o = new JSONObject(); + o.put("status", "error"); + o.put("message", error); + response.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); + respond(response, o.toString()); + } catch (JSONException e) { + e.printStackTrace(response.getWriter()); + } + } + + static protected void respondException(HttpServletResponse response, Exception e) throws IOException, ServletException { + + if (response == null) { + throw new ServletException("Response object can't be null"); + } + + try { + JSONObject o = new JSONObject(); + o.put("status", "error"); + o.put("message", e.getMessage()); + + StringWriter sw = new StringWriter(); + PrintWriter pw = new PrintWriter(sw); + e.printStackTrace(pw); + pw.flush(); + sw.flush(); + + o.put("stack", sw.toString()); + + response.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); + respond(response, o.toString()); + } catch (JSONException e1) { + e.printStackTrace(response.getWriter()); + } + } + + static protected void respond(HttpServletResponse response, JSONObject content) throws IOException, ServletException { + if (content == null) { + throw new ServletException("Content object can't be null"); + } + + respond(response, content.toString()); + } + + static protected void respond(HttpServletResponse response, String content) throws IOException, ServletException { + if (response == null) { + throw new ServletException("Response object can't be null"); + } + + Writer w = response.getWriter(); + if (w != null) { + w.write(content); + w.flush(); + w.close(); + } else { + throw new ServletException("response returned a null writer"); + } + } +} diff --git a/broker/core/src/com/google/refine/broker/GridworksBrokerImpl.java b/broker/core/src/com/google/refine/broker/GridworksBrokerImpl.java new file mode 100644 index 000000000..8b656513f --- /dev/null +++ b/broker/core/src/com/google/refine/broker/GridworksBrokerImpl.java @@ -0,0 +1,592 @@ +package com.google.refine.broker; + +import static com.sleepycat.persist.model.Relationship.MANY_TO_ONE; + +import java.io.File; +import java.io.Writer; +import java.util.ArrayList; +import java.util.List; +import java.util.Timer; +import java.util.TimerTask; + +import javax.servlet.ServletConfig; +import javax.servlet.http.HttpServletResponse; + +import org.apache.http.client.HttpClient; +import org.apache.http.impl.client.DefaultHttpClient; +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.sleepycat.je.Environment; +import com.sleepycat.je.EnvironmentConfig; +import com.sleepycat.je.Transaction; +import com.sleepycat.persist.EntityCursor; +import com.sleepycat.persist.EntityStore; +import com.sleepycat.persist.PrimaryIndex; +import com.sleepycat.persist.SecondaryIndex; +import com.sleepycat.persist.StoreConfig; +import com.sleepycat.persist.model.Entity; +import com.sleepycat.persist.model.PrimaryKey; +import com.sleepycat.persist.model.SecondaryKey; + +public class GridworksBrokerImpl extends GridworksBroker { + + protected static final Logger logger = LoggerFactory.getLogger("gridworks.broker.local"); + + Environment env; + + EntityStore projectStore; + EntityStore lockStore; + EntityStore userStore; + + PrimaryIndex projectById; + PrimaryIndex lockById; + + SecondaryIndex locksByProject; + + Timer timer; + Expirer expirer; + + @Override + public void init(ServletConfig config) throws Exception { + logger.trace("> init"); + super.init(config); + + timer = new Timer(); + expirer = new Expirer(); + timer.schedule(expirer, 0, LOCK_EXPIRATION_CHECK_DELAY); + + String dataDir = config.getInitParameter("gridworks.data"); + if (dataDir == null) dataDir = "data"; + File dataPath = new File(dataDir); + if (!dataPath.exists()) dataPath.mkdirs(); + + EnvironmentConfig envConfig = new EnvironmentConfig(); + envConfig.setAllowCreate(true); + envConfig.setTransactional(true); + env = new Environment(dataPath, envConfig); + + StoreConfig storeConfig = new StoreConfig(); + storeConfig.setAllowCreate(true); + storeConfig.setTransactional(true); + projectStore = new EntityStore(env, "ProjectsStore", storeConfig); + lockStore = new EntityStore(env, "LockStore", storeConfig); + + projectById = projectStore.getPrimaryIndex(String.class, Project.class); + lockById = lockStore.getPrimaryIndex(String.class, Lock.class); + + locksByProject = lockStore.getSecondaryIndex(lockById, String.class, "pid"); + logger.trace("< init"); + } + + @Override + public void destroy() throws Exception { + logger.trace("> destroy"); + super.destroy(); + + if (projectStore != null) { + projectStore.close(); + projectById = null; + } + + if (lockStore != null) { + lockStore.close(); + lockById = null; + } + + if (timer != null) { + timer.cancel(); + timer.purge(); + timer = null; + } + + if (env != null) { + env.close(); + env = null; + } + logger.trace("< destroy"); + } + + class Expirer extends TimerTask { + public void run() { + if (lockById != null) { + logger.trace("> expire"); + Transaction txn = env.beginTransaction(null, null); + try { + EntityCursor cursor = lockById.entities(); + try { + for (Lock lock : cursor) { + if (lock.timestamp + LOCK_DURATION < System.currentTimeMillis()) { + logger.trace("Found expired lock {}", lock.id); + try { + releaseLock(null, lock.pid, lock.uid, lock.id); + } catch (Exception e) { + logger.error("Exception while expiring lock for project '" + lock.pid + "'", e); + } + } + } + } finally { + cursor.close(); + } + } finally { + if (txn != null) { + txn.abort(); + txn = null; + } + } + logger.trace("< expire"); + } + } + } + + // --------------------------------------------------------------------------------- + + @Override + protected HttpClient getHttpClient() { + return new DefaultHttpClient(); + } + + // --------------------------------------------------------------------------------- + + @Override + protected void expire(HttpServletResponse response) throws Exception { + expirer.run(); + respond(response, OK); + } + + @Override + protected void obtainLock(HttpServletResponse response, String pid, String uid, int locktype, String lockvalue) throws Exception { + logger.trace("> obtain lock"); + Lock lock = null; + Lock blocker = null; + + Transaction txn = env.beginTransaction(null, null); + + try { + + EntityCursor cursor = locksByProject.subIndex(pid).entities(); + + /* + * ALL + * blocked -> somebody else's lock + * reuse -> you already have an ALL lock + * new -> else + * + * COL + * blocked -> somebody else's all lock || a lock on the same col + * reuse -> you have an ALL lock || a lock on the same col + * new -> else + * + * CELL + * blocked -> somebody else's all lock || a lock on the same col || a lock on the same cell + * reuse -> you have a lock on the same cell + * yes -> (you have a lock on the same cell) && (nobody else has a lock on the same cell || the same col || all) + * new -> else + * + */ + + try { + if (locktype == ALL) { + if (lockvalue.length() > 0) { + throw new RuntimeException("Hmm, seems like you're calling an ALL with a specific value, are you sure you didn't want another type of lock?"); + } + + for (Lock l : cursor) { + if (!l.uid.equals(uid)) { + blocker = l; + break; + } else { + if (l.type == ALL) { + lock = l; + break; + } + } + } + } else if (locktype == COL) { + if (lockvalue.indexOf(',') > -1) { + throw new RuntimeException("Hmm, seems like you're calling a COL lock with a CELL value"); + } + + for (Lock l : cursor) { + if (!l.uid.equals(uid)) { + if (l.type == ALL || + (l.type == COL && l.value.equals(lockvalue)) || + (l.type == CELL && l.value.split(",")[0].equals(lockvalue))) { + blocker = l; + break; + } + } else { + if (l.type == ALL || + (l.type == COL && l.value.equals(lockvalue))) { + lock = l; + break; + } + } + } + } else if (locktype == CELL) { + if (lockvalue.indexOf(',') == -1) { + throw new RuntimeException("Hmm, seems like you're calling a CELL lock without specifying row and column: format must be 'row,column'"); + } + + for (Lock l : cursor) { + if (!l.uid.equals(uid)) { + if (l.type == ALL || + (l.type == COL && l.value.equals(lockvalue.split(",")[0])) || + (l.type == CELL && l.value.equals(lockvalue))) { + blocker = l; + break; + } + } else { + if (l.type == ALL || + (l.type == COL && l.value.equals(lockvalue.split(",")[0])) || + (l.type == CELL && l.value.equals(lockvalue))) { + lock = l; + break; + } + } + } + } + } finally { + cursor.close(); + } + + if (blocker != null) { + logger.info("found a blocking lock {}", lockToString(blocker)); + throw new RuntimeException("Can't obtain lock, it is blocked by a type '" + blocker.type + "' lock owned by '" + blocker.uid + "'"); + } + + if (lock == null) { + logger.info("no comparable lock already exists, creating a new one"); + lock = new Lock(Long.toHexString(txn.getId()), pid, uid, locktype, lockvalue); + lockById.put(txn, lock); + txn.commit(); + } + + } finally { + if (txn != null) { + txn.abort(); + txn = null; + } + } + + JSONObject o = lockToJSON(lock, uid); + o.put("status", "ok"); + respond(response, o); + + logger.trace("< obtain lock"); + } + + @Override + protected void releaseLock(HttpServletResponse response, String pid, String uid, String lid) throws Exception { + + Transaction txn = env.beginTransaction(null, null); + + try { + Lock lock = getLock(lid, pid, uid); + if (lock != null) { + if (!lock.uid.equals(uid)) { + throw new RuntimeException("User id doesn't match the lock owner, can't release the lock"); + } + lockById.delete(lid); + txn.commit(); + } + } finally { + if (txn != null) { + txn.abort(); + txn = null; + } + } + + if (response != null) { // this because the expiration thread can call this method without a real response + respond(response, OK); + } + } + + // ---------------------------------------------------------------------------------------------------- + + @Override + protected void startProject(HttpServletResponse response, String pid, String uid, String lid, byte[] data, String metadata, List transformations) throws Exception { + + Transaction txn = env.beginTransaction(null, null); + + try { + if (projectById.contains(pid)) { + throw new RuntimeException("Project '" + pid + "' already exists"); + } + + Lock lock = getLock(lid, pid, uid); + + if (lock.type != ALL) { + throw new RuntimeException("The lock you have is not enough to start a project"); + } + + projectById.put(txn, new Project(pid, data, metadata, transformations)); + txn.commit(); + } finally { + if (txn != null) { + txn.abort(); + txn = null; + } + } + + respond(response, OK); + } + + @Override + protected void addTransformations(HttpServletResponse response, String pid, String uid, String lid, List transformations) throws Exception { + + Transaction txn = env.beginTransaction(null, null); + + try { + Project project = getProject(pid); + + if (project == null) { + throw new RuntimeException("Project '" + pid + "' not found"); + } + + Lock lock = getLock(lid, pid, uid); + + logger.info("obtained lock: {}", lockToString(lock)); + + if (lock.type == ALL) { + project.transformations.addAll(transformations); + } else { + for (String s : transformations) { + JSONObject o = new JSONObject(s); + + int type = o.getInt("op_type"); + String value = o.getString("op_value"); + if (lock.type == COL) { + if (type == COL) { + if (value != null && value.equals(lock.value)) { + project.transformations.add(s); + } else { + throw new RuntimeException("Can't apply '" + s + "': you have a lock for column '" + lock.value + "' and you're attempting to modify column '" + value + "'."); + } + } else if (type == CELL) { + String column = value.split(",")[0]; + if (column != null && column.equals(lock.value)) { + project.transformations.add(s); + } else { + throw new RuntimeException("Can't apply '" + s + "': you have a lock for column '" + lock.value + "' and you're attempting to modify cell '" + value + "' in another column."); + } + } + } else if (lock.type == CELL) { + if (type == COL) { + throw new RuntimeException("Can't apply '" + s + "': you offered a lock for a single cell and you're attempting an operation for the entire column."); + } else if (type == CELL) { + if (value != null && value.equals(lock.value)) { + project.transformations.add(s); + } else { + throw new RuntimeException("Can't apply '" + s + "': you have a lock for cell '" + lock.value + "' and you're attempting to modify cell '" + value + "'."); + } + } + } + } + } + + projectById.put(txn, project); + + txn.commit(); + } finally { + if (txn != null) { + txn.abort(); + txn = null; + } + } + + respond(response, OK); + } + + // --------------------------------------------------------------------------------- + + @Override + protected void openProject(HttpServletResponse response, String pid) throws Exception { + Project project = getProject(pid); + + Writer w = response.getWriter(); + JSONWriter writer = new JSONWriter(w); + writer.object(); + writer.key("status"); writer.value("ok"); + writer.key("data"); writer.value(project.data); + writer.key("metadata"); writer.value(new JSONObject(project.metadata)); + writer.key("transformations"); + writer.array(); + for (String s : project.transformations) { + writer.value(new JSONObject(s)); + } + writer.endArray(); + writer.endObject(); + w.flush(); + w.close(); + } + + // --------------------------------------------------------------------------------- + + @Override + protected void getState(HttpServletResponse response, String pid, String uid, int rev) throws Exception { + + Project project = getProject(pid); + + Writer w = response.getWriter(); + JSONWriter writer = new JSONWriter(w); + + writer.object(); + writer.key("status"); writer.value("ok"); + writer.key("transformations"); + writer.array(); + int size = project.transformations.size(); + for (int i = rev; i < size; i++) { + writer.value(new JSONObject(project.transformations.get(i))); + } + writer.endArray(); + + EntityCursor cursor = locksByProject.subIndex(pid).entities(); + + try { + writer.key("locks"); + writer.array(); + for (Lock lock : cursor) { + writer.value(lockToJSON(lock, uid)); + } + writer.endArray(); + writer.endObject(); + + w.flush(); + w.close(); + } finally { + cursor.close(); + } + } + + // --------------------------------------------------------------------------------- + + Project getProject(String pid) { + Project project = projectById.get(pid); + if (project == null) { + throw new RuntimeException("Project '" + pid + "' could not be found: are you sure is not managed by another broker?"); + } + return project; + } + + @Entity + static class Project { + + @PrimaryKey + String pid; + + List transformations; + + byte[] data; + + String metadata; + + int rev; + + Project(String pid, byte[] data, String metadata, List transformations) { + this.pid = pid; + this.data = data; + this.metadata = metadata; + this.transformations = (transformations != null) ? transformations : new ArrayList(); + this.rev = this.transformations.size(); + } + + @SuppressWarnings("unused") + private Project() {} + } + + // --------------------------------------------------------------------------------- + + Lock getLock(String lid, String pid, String uid) { + Lock lock = lockById.get(lid); + checkLock(lock, lid, pid, uid); + return lock; + } + + void checkLock(Lock lock, String lid, String pid, String uid) { + if (lock == null) { + throw new RuntimeException("No lock was found with the given Lock id '" + lid + "', you have to have a valid lock on a project in order to start it"); + } + + if (!lock.pid.equals(pid)) { + throw new RuntimeException("Lock '" + lock.id + "' is for another project: " + lock.pid); + } + + if (!lock.uid.equals(uid)) { + throw new RuntimeException("Lock '" + lock.id + "' is owned by another user: " + lock.uid); + } + } + + Lock getLock(String pid, String uid, int locktype) { + Lock lock = null; + EntityCursor cursor = locksByProject.subIndex(pid).entities(); + + try { + for (Lock l : cursor) { + if (uid.equals(l.uid) && (locktype == l.type)) { + lock = l; + break; + } + } + } finally { + cursor.close(); + } + + return lock; + } + + JSONObject lockToJSON(Lock lock, String uid) throws JSONException { + JSONObject o = new JSONObject(); + if (lock != null) { + // NOTE: only the owner of the lock should get the ID, + // otherwise others can just fake ownership of other people's locks + if (lock.uid.equals(uid)) { + o.put("lock", lock.id); + } + + o.put("pid", lock.pid); + o.put("uid", lock.uid); + o.put("type", lock.type); + o.put("value", lock.value); + o.put("timestamp", lock.timestamp); + } + return o; + } + + String lockToString(Lock lock) { + return lock.id + "," + lock.pid + "," + lock.uid + "," + lock.type + "," + lock.value; + } + + @Entity + static class Lock { + + @PrimaryKey + String id; + + @SecondaryKey(relate=MANY_TO_ONE) + String pid; + + String uid; + + int type; + + String value; + + long timestamp; + + Lock(String id, String pid, String uid, int type, String value) { + this.id = id; + this.pid = pid; + this.uid = uid; + this.type = type; + this.value = value; + this.timestamp = System.currentTimeMillis(); + } + + @SuppressWarnings("unused") + private Lock() {} + } +} diff --git a/broker/core/tests/src/com/google/gridworks/broker/tests/GridworksBrokerTests.java b/broker/core/tests/src/com/google/gridworks/broker/tests/GridworksBrokerTests.java deleted file mode 100644 index f1a8a6e61..000000000 --- a/broker/core/tests/src/com/google/gridworks/broker/tests/GridworksBrokerTests.java +++ /dev/null @@ -1,452 +0,0 @@ -package com.google.gridworks.broker.tests; - -import static com.google.gridworks.broker.GridworksBroker.*; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - -import java.io.ByteArrayInputStream; -import java.io.File; -import java.io.IOException; -import java.io.PrintWriter; -import java.io.StringWriter; - -import javax.servlet.ServletConfig; -import javax.servlet.ServletInputStream; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.testng.Assert; -import org.testng.annotations.AfterMethod; -import org.testng.annotations.AfterSuite; -import org.testng.annotations.AfterTest; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.BeforeSuite; -import org.testng.annotations.BeforeTest; -import org.testng.annotations.Test; - -import com.google.gridworks.broker.GridworksBroker; -import com.google.gridworks.broker.GridworksBrokerImpl; - -public class GridworksBrokerTests { - - Logger logger; - File data; - - @BeforeSuite public void suite_init() { - System.setProperty("log4j.configuration", "tests.log4j.properties"); - data = new File("data"); - if (!data.exists()) data.mkdirs(); - } - - @AfterSuite public void suite_destroy() { - for (File f : data.listFiles()) { - f.delete(); - } - data.delete(); - } - - // ------------------------------------------------------------------------------------ - - ServletConfig config = null; - GridworksBroker broker = null; - - @BeforeTest public void test_init() throws Exception { - logger = LoggerFactory.getLogger(this.getClass()); - config = mock(ServletConfig.class); - when(config.getInitParameter("gridworks.data")).thenReturn(data.getAbsolutePath()); - when(config.getInitParameter("gridworks.development")).thenReturn("true"); - - broker = new GridworksBrokerImpl(); - broker.init(config); - } - - @AfterTest public void test_destroy() throws Exception { - broker.destroy(); - broker = null; - config = null; - } - - // ------------------------------------------------------------------------------------ - - HttpServletRequest request = null; - HttpServletResponse response = null; - StringWriter writer = null; - - @BeforeMethod public void setup() throws Exception { - request = mock(HttpServletRequest.class); - response = mock(HttpServletResponse.class); - } - - @AfterMethod public void teardown() throws Exception { - response = null; - request = null; - } - - // ------------------------------------------------------------------------------------ - - @Test public void testLifeCycle() { - Assert.assertTrue(true); - } - - @Test public void testService() { - try { - success(broker, request, response, EXPIRE); - } catch (Exception e) { - Assert.fail(); - } - } - - @Test public void testObtainLockFailure() { - try { - failure(broker, request, response, OBTAIN_LOCK); - } catch (Exception e) { - Assert.fail(); - } - } - - @Test public void testReleaseLockFailure() { - try { - failure(broker, request, response, RELEASE_LOCK); - } catch (Exception e) { - Assert.fail(); - } - } - - @Test public void testGetStateFailure() { - try { - failure(broker, request, response, GET_STATE, "pid", "project1934983948", "uid", "testuser", "rev", "0"); - } catch (Exception e) { - Assert.fail(); - } - } - - @Test public void testBrokenAllLockFailure() { - try { - failure(broker, request, response, OBTAIN_LOCK, "pid", "project", "uid", "testuser", "locktype", Integer.toString(ALL), "lockvalue", "1"); - } catch (Exception e) { - Assert.fail(); - } - } - - @Test public void testBrokenColLockFailure() { - try { - failure(broker, request, response, OBTAIN_LOCK, "pid", "project", "uid", "testuser", "locktype", Integer.toString(COL), "lockvalue", "1,1"); - } catch (Exception e) { - Assert.fail(); - } - } - - @Test public void testBrokenCellLockFailure() { - try { - failure(broker, request, response, OBTAIN_LOCK, "pid", "project", "uid", "testuser", "locktype", Integer.toString(CELL), "lockvalue", "1"); - } catch (Exception e) { - Assert.fail(); - } - } - - @Test public void testLockSimple() { - String project = "proj0"; - String user = "testuser"; - - try { - logger.info("--- obtain ALL lock on project ---"); - JSONObject result = success(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user, "locktype", Integer.toString(ALL), "lockvalue", ""); - assertJSON(result, "uid", "testuser"); - String lock = result.getString("lock"); - - logger.info("--- obtain ALL lock on project ---"); - success(broker, request, response, RELEASE_LOCK, "pid", project, "uid", user, "lock", lock); - - logger.info("--- obtain COL lock on project ---"); - result = success(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user, "locktype", Integer.toString(COL), "lockvalue", "1"); - assertJSON(result, "uid", "testuser"); - lock = result.getString("lock"); - - logger.info("--- release COL lock on project ---"); - success(broker, request, response, RELEASE_LOCK, "pid", project, "uid", user, "lock", lock); - - logger.info("--- obtain CELL lock on project ---"); - result = success(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user, "locktype", Integer.toString(CELL), "lockvalue", "1,1"); - assertJSON(result, "uid", "testuser"); - lock = result.getString("lock"); - - logger.info("--- release CELL lock on project ---"); - success(broker, request, response, RELEASE_LOCK, "pid", project, "uid", user, "lock", lock); - } catch (Exception e) { - Assert.fail(); - } - } - - @Test public void testLocksAllBlocks() { - String project = "proj1"; - String user = "testuser"; - String user2 = "testuser2"; - - try { - logger.info("--- obtain ALL lock on project ---"); - JSONObject result = success(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user, "locktype", Integer.toString(ALL), "lockvalue", ""); - assertJSON(result, "uid", user); - String lock = result.getString("lock"); - - logger.info("--- another using asking for any lock will fail ---"); - failure(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user2, "locktype", Integer.toString(ALL), "lockvalue", ""); - failure(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user2, "locktype", Integer.toString(COL), "lockvalue", "1"); - failure(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user2, "locktype", Integer.toString(CELL), "lockvalue", "1,1"); - - logger.info("--- same user asking for lower capable locks will return the ALL one ---"); - result = success(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user, "locktype", Integer.toString(COL), "lockvalue", "1"); - String lock2 = result.getString("lock"); - Assert.assertEquals(lock, lock2); - - result = success(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user, "locktype", Integer.toString(CELL), "lockvalue", "1,1"); - lock2 = result.getString("lock"); - Assert.assertEquals(lock, lock2); - - logger.info("--- release the ALL lock ---"); - success(broker, request, response, RELEASE_LOCK, "pid", project, "uid", user, "lock", lock); - } catch (Exception e) { - Assert.fail(); - } - } - - @Test public void testLocksColBlocks() { - String project = "proj2"; - String user = "testuser"; - String user2 = "testuser2"; - - try { - logger.info("--- obtain COL lock on project ---"); - JSONObject result = success(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user, "locktype", Integer.toString(COL), "lockvalue", "1"); - String lock = result.getString("lock"); - - logger.info("--- other user must fail to obtain lock on the same COL or ALL ---"); - failure(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user2, "locktype", Integer.toString(ALL), "lockvalue", ""); - failure(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user2, "locktype", Integer.toString(COL), "lockvalue", "1"); - failure(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user2, "locktype", Integer.toString(CELL), "lockvalue", "1,1"); - - logger.info("--- but succeed in getting a COL lock on another column or cell ---"); - result = success(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user2, "locktype", Integer.toString(COL), "lockvalue", "2"); - String lock2 = result.getString("lock"); - - logger.info("--- now it's our first user's turn to fail to get lock ---"); - failure(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user, "locktype", Integer.toString(ALL), "lockvalue", ""); - failure(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user, "locktype", Integer.toString(COL), "lockvalue", "2"); - failure(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user, "locktype", Integer.toString(CELL), "lockvalue", "2,1"); - - logger.info("--- release the locks ---"); - success(broker, request, response, RELEASE_LOCK, "pid", project, "uid", user, "lock", lock); - success(broker, request, response, RELEASE_LOCK, "pid", project, "uid", user2, "lock", lock2); - } catch (Exception e) { - Assert.fail(); - } - } - - @Test public void testLocksCellBlocks() { - String project = "proj3"; - String user = "testuser"; - String user2 = "testuser2"; - - try { - logger.info("--- obtain CELL lock on project ---"); - JSONObject result = success(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user, "locktype", Integer.toString(CELL), "lockvalue", "1,1"); - String lock = result.getString("lock"); - - logger.info("--- other user must fail to obtain lock on the same CELL, COL or ALL ---"); - failure(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user2, "locktype", Integer.toString(ALL), "lockvalue", ""); - failure(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user2, "locktype", Integer.toString(COL), "lockvalue", "1"); - failure(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user2, "locktype", Integer.toString(CELL), "lockvalue", "1,1"); - - logger.info("--- but succeed in getting a CELL lock on a cell in another column ---"); - result = success(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user2, "locktype", Integer.toString(CELL), "lockvalue", "2,1"); - String lock2 = result.getString("lock"); - - logger.info("--- now it's our first user's turn to fail to get lock ---"); - failure(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user, "locktype", Integer.toString(ALL), "lockvalue", ""); - failure(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user, "locktype", Integer.toString(COL), "lockvalue", "2"); - failure(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user, "locktype", Integer.toString(CELL), "lockvalue", "2,1"); - - logger.info("--- release the locks ---"); - success(broker, request, response, RELEASE_LOCK, "pid", project, "uid", user, "lock", lock); - success(broker, request, response, RELEASE_LOCK, "pid", project, "uid", user2, "lock", lock2); - } catch (Exception e) { - Assert.fail(); - } - } - - @Test public void testCompleteProjectLifeCycle() { - try { - String project = "proj4"; - String user = "testuser"; - String user2 = "testuser2"; - String data = "blah"; - String metadata = "{}"; - String transformations = "[]"; - String rev = "0"; - - logger.info("--- obtain ALL lock on project ---"); - JSONObject result = success(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user, "locktype", Integer.toString(ALL), "lockvalue", ""); - assertJSON(result, "uid", user); - String lock = result.getString("lock"); - - logger.info("--- start project ---"); - success(broker, request, response, START, "pid", project, "uid", user, "lock", lock, "data", data, "metadata", metadata, "transformations", transformations); - - logger.info("--- verify project state contains lock ---"); - result = success(broker, request, response, GET_STATE, "pid", project, "uid", user, "rev", rev); - JSONArray locks = result.getJSONArray("locks"); - Assert.assertEquals(locks.length(), 1); - JSONObject l = locks.getJSONObject(0); - assertJSON(l, "uid", "testuser"); - Assert.assertEquals(l.getInt("type"), ALL); - - logger.info("--- release ALL lock on project ---"); - success(broker, request, response, RELEASE_LOCK, "pid", project, "uid", user, "lock", lock); - - logger.info("--- verify no locks are present ---"); - result = success(broker, request, response, GET_STATE, "pid", project, "uid", user, "rev", rev); - locks = result.getJSONArray("locks"); - Assert.assertEquals(locks.length(), 0); - - logger.info("--- open project and verify data was loaded correctly ---"); - result = success(broker, request, response, OPEN, "pid", project, "uid", user, "rev", rev); - JSONArray result_data = result.getJSONArray("data"); - Assert.assertEquals(result_data.length(),data.getBytes("UTF-8").length); - - JSONArray tt; - JSONObject t; - - logger.info("--- obtain column lock ---"); - String column = "1"; - result = success(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user, "locktype", Integer.toString(COL), "lockvalue", column); - String col_lock = result.getString("lock"); - - logger.info("--- perform column transformation ---"); - t = new JSONObject(); - t.put("op_type", COL); - t.put("op_value", column); // operate on col 1 - t.put("value", new JSONObject()); - tt = new JSONArray(); - tt.put(t); - result = success(broker, request, response, TRANSFORM, "pid", project, "uid", user, "lock", col_lock, "transformations", tt.toString()); - - logger.info("--- make sure transformation was recorded properly ---"); - result = success(broker, request, response, GET_STATE, "pid", project, "uid", user, "rev", "0"); - tt = result.getJSONArray("transformations"); - Assert.assertEquals(tt.length(), 1); - t = tt.getJSONObject(0); - assertJSON(t, "op_value", column); - - logger.info("--- make sure revision numbers in state management work as expected ---"); - result = success(broker, request, response, GET_STATE, "pid", project, "uid", user, "rev", "1"); - tt = result.getJSONArray("transformations"); - Assert.assertEquals(tt.length(), 0); - - logger.info("--- perform cell transformation ---"); - String cell = "1"; - t = new JSONObject(); - t.put("op_type", CELL); - t.put("op_value", column + "," + cell); // operate on cell at row 1 column 1 - t.put("value", new JSONObject()); - tt = new JSONArray(); - tt.put(t); - result = success(broker, request, response, TRANSFORM, "pid", project, "uid", user, "lock", col_lock, "transformations", tt.toString()); - - logger.info("--- make sure transformation was recorded properly ---"); - result = success(broker, request, response, GET_STATE, "pid", project, "uid", user, "rev", "0"); - tt = result.getJSONArray("transformations"); - Assert.assertEquals(tt.length(), 2); - - result = success(broker, request, response, GET_STATE, "pid", project, "uid", user, "rev", "1"); - tt = result.getJSONArray("transformations"); - Assert.assertEquals(tt.length(), 1); - t = tt.getJSONObject(0); - assertJSON(t, "op_value", column + "," + cell); - - logger.info("--- make sure another user fails to acquire ALL lock ---"); - failure(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user2, "locktype", Integer.toString(ALL), "lockvalue", ""); - - logger.info("--- make sure another user fails to acquire COL lock on the same column ---"); - failure(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user2, "locktype", Integer.toString(COL), "lockvalue", column); - - logger.info("--- make sure another user manages to acquire COL lock on another column ---"); - String column2 = "2"; - result = success(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user2, "locktype", Integer.toString(COL), "lockvalue", column2); - String col_lock2 = result.getString("lock"); - - logger.info("--- make sure that both locks are present ---"); - result = success(broker, request, response, GET_STATE, "pid", project, "uid", user, "rev", "2"); - locks = result.getJSONArray("locks"); - Assert.assertEquals(locks.length(), 2); - - logger.info("--- make sure we can't escalate our current COL lock to an ALL lock ---"); - failure(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user, "locktype", Integer.toString(ALL), "lockvalue", ""); - - logger.info("--- release column locks ---"); - success(broker, request, response, RELEASE_LOCK, "pid", project, "uid", user, "lock", col_lock); - success(broker, request, response, RELEASE_LOCK, "pid", project, "uid", user2, "lock", col_lock2); - - logger.info("--- make sure the project has no locks ---"); - result = success(broker, request, response, GET_STATE, "pid", project, "uid", user, "rev", "2"); - locks = result.getJSONArray("locks"); - Assert.assertEquals(locks.length(), 0); - - } catch (Exception e) { - Assert.fail(); - } - } - - // ------------------------------------------------------------------------------------ - - private void assertJSON(JSONObject o, String name, String value) throws JSONException { - Assert.assertEquals(o.get(name), value); - } - - private JSONObject success(GridworksBroker broker, HttpServletRequest request, HttpServletResponse response, String service, String... params) throws Exception { - return call(true, broker, request, response, service, params); - } - - private JSONObject failure(GridworksBroker broker, HttpServletRequest request, HttpServletResponse response, String service, String... params) throws Exception { - return call(false, broker, request, response, service, params); - } - - private JSONObject call(boolean successful, GridworksBroker broker, HttpServletRequest request, HttpServletResponse response, String service, String... params) throws Exception { - if (params != null) { - for (int i = 0; i < params.length; ) { - String name = params[i++]; - String value = params[i++]; - if ("data".equals(name)) { - final ByteArrayInputStream inputStream = new ByteArrayInputStream(value.getBytes("UTF-8")); - when(request.getInputStream()).thenReturn(new ServletInputStream() { - public int read() throws IOException { - return inputStream.read(); - } - }); - } else { - when(request.getParameter(name)).thenReturn(value); - } - } - } - - StringWriter writer = new StringWriter(); - when(response.getWriter()).thenReturn(new PrintWriter(writer)); - - broker.process(service, request, response); - - JSONObject result = new JSONObject(writer.toString()); - - if (successful) { - assertJSON(result, "status", "ok"); - } else { - assertJSON(result, "status", "error"); - } - - logger.info(result.toString()); - - return result; - } -} diff --git a/broker/core/tests/src/com/google/refine/broker/tests/GridworksBrokerTests.java b/broker/core/tests/src/com/google/refine/broker/tests/GridworksBrokerTests.java new file mode 100644 index 000000000..a4469985d --- /dev/null +++ b/broker/core/tests/src/com/google/refine/broker/tests/GridworksBrokerTests.java @@ -0,0 +1,452 @@ +package com.google.refine.broker.tests; + +import static com.google.refine.broker.GridworksBroker.*; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.IOException; +import java.io.PrintWriter; +import java.io.StringWriter; + +import javax.servlet.ServletConfig; +import javax.servlet.ServletInputStream; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testng.Assert; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.AfterSuite; +import org.testng.annotations.AfterTest; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.BeforeSuite; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.Test; + +import com.google.refine.broker.GridworksBroker; +import com.google.refine.broker.GridworksBrokerImpl; + +public class GridworksBrokerTests { + + Logger logger; + File data; + + @BeforeSuite public void suite_init() { + System.setProperty("log4j.configuration", "tests.log4j.properties"); + data = new File("data"); + if (!data.exists()) data.mkdirs(); + } + + @AfterSuite public void suite_destroy() { + for (File f : data.listFiles()) { + f.delete(); + } + data.delete(); + } + + // ------------------------------------------------------------------------------------ + + ServletConfig config = null; + GridworksBroker broker = null; + + @BeforeTest public void test_init() throws Exception { + logger = LoggerFactory.getLogger(this.getClass()); + config = mock(ServletConfig.class); + when(config.getInitParameter("gridworks.data")).thenReturn(data.getAbsolutePath()); + when(config.getInitParameter("gridworks.development")).thenReturn("true"); + + broker = new GridworksBrokerImpl(); + broker.init(config); + } + + @AfterTest public void test_destroy() throws Exception { + broker.destroy(); + broker = null; + config = null; + } + + // ------------------------------------------------------------------------------------ + + HttpServletRequest request = null; + HttpServletResponse response = null; + StringWriter writer = null; + + @BeforeMethod public void setup() throws Exception { + request = mock(HttpServletRequest.class); + response = mock(HttpServletResponse.class); + } + + @AfterMethod public void teardown() throws Exception { + response = null; + request = null; + } + + // ------------------------------------------------------------------------------------ + + @Test public void testLifeCycle() { + Assert.assertTrue(true); + } + + @Test public void testService() { + try { + success(broker, request, response, EXPIRE); + } catch (Exception e) { + Assert.fail(); + } + } + + @Test public void testObtainLockFailure() { + try { + failure(broker, request, response, OBTAIN_LOCK); + } catch (Exception e) { + Assert.fail(); + } + } + + @Test public void testReleaseLockFailure() { + try { + failure(broker, request, response, RELEASE_LOCK); + } catch (Exception e) { + Assert.fail(); + } + } + + @Test public void testGetStateFailure() { + try { + failure(broker, request, response, GET_STATE, "pid", "project1934983948", "uid", "testuser", "rev", "0"); + } catch (Exception e) { + Assert.fail(); + } + } + + @Test public void testBrokenAllLockFailure() { + try { + failure(broker, request, response, OBTAIN_LOCK, "pid", "project", "uid", "testuser", "locktype", Integer.toString(ALL), "lockvalue", "1"); + } catch (Exception e) { + Assert.fail(); + } + } + + @Test public void testBrokenColLockFailure() { + try { + failure(broker, request, response, OBTAIN_LOCK, "pid", "project", "uid", "testuser", "locktype", Integer.toString(COL), "lockvalue", "1,1"); + } catch (Exception e) { + Assert.fail(); + } + } + + @Test public void testBrokenCellLockFailure() { + try { + failure(broker, request, response, OBTAIN_LOCK, "pid", "project", "uid", "testuser", "locktype", Integer.toString(CELL), "lockvalue", "1"); + } catch (Exception e) { + Assert.fail(); + } + } + + @Test public void testLockSimple() { + String project = "proj0"; + String user = "testuser"; + + try { + logger.info("--- obtain ALL lock on project ---"); + JSONObject result = success(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user, "locktype", Integer.toString(ALL), "lockvalue", ""); + assertJSON(result, "uid", "testuser"); + String lock = result.getString("lock"); + + logger.info("--- obtain ALL lock on project ---"); + success(broker, request, response, RELEASE_LOCK, "pid", project, "uid", user, "lock", lock); + + logger.info("--- obtain COL lock on project ---"); + result = success(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user, "locktype", Integer.toString(COL), "lockvalue", "1"); + assertJSON(result, "uid", "testuser"); + lock = result.getString("lock"); + + logger.info("--- release COL lock on project ---"); + success(broker, request, response, RELEASE_LOCK, "pid", project, "uid", user, "lock", lock); + + logger.info("--- obtain CELL lock on project ---"); + result = success(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user, "locktype", Integer.toString(CELL), "lockvalue", "1,1"); + assertJSON(result, "uid", "testuser"); + lock = result.getString("lock"); + + logger.info("--- release CELL lock on project ---"); + success(broker, request, response, RELEASE_LOCK, "pid", project, "uid", user, "lock", lock); + } catch (Exception e) { + Assert.fail(); + } + } + + @Test public void testLocksAllBlocks() { + String project = "proj1"; + String user = "testuser"; + String user2 = "testuser2"; + + try { + logger.info("--- obtain ALL lock on project ---"); + JSONObject result = success(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user, "locktype", Integer.toString(ALL), "lockvalue", ""); + assertJSON(result, "uid", user); + String lock = result.getString("lock"); + + logger.info("--- another using asking for any lock will fail ---"); + failure(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user2, "locktype", Integer.toString(ALL), "lockvalue", ""); + failure(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user2, "locktype", Integer.toString(COL), "lockvalue", "1"); + failure(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user2, "locktype", Integer.toString(CELL), "lockvalue", "1,1"); + + logger.info("--- same user asking for lower capable locks will return the ALL one ---"); + result = success(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user, "locktype", Integer.toString(COL), "lockvalue", "1"); + String lock2 = result.getString("lock"); + Assert.assertEquals(lock, lock2); + + result = success(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user, "locktype", Integer.toString(CELL), "lockvalue", "1,1"); + lock2 = result.getString("lock"); + Assert.assertEquals(lock, lock2); + + logger.info("--- release the ALL lock ---"); + success(broker, request, response, RELEASE_LOCK, "pid", project, "uid", user, "lock", lock); + } catch (Exception e) { + Assert.fail(); + } + } + + @Test public void testLocksColBlocks() { + String project = "proj2"; + String user = "testuser"; + String user2 = "testuser2"; + + try { + logger.info("--- obtain COL lock on project ---"); + JSONObject result = success(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user, "locktype", Integer.toString(COL), "lockvalue", "1"); + String lock = result.getString("lock"); + + logger.info("--- other user must fail to obtain lock on the same COL or ALL ---"); + failure(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user2, "locktype", Integer.toString(ALL), "lockvalue", ""); + failure(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user2, "locktype", Integer.toString(COL), "lockvalue", "1"); + failure(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user2, "locktype", Integer.toString(CELL), "lockvalue", "1,1"); + + logger.info("--- but succeed in getting a COL lock on another column or cell ---"); + result = success(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user2, "locktype", Integer.toString(COL), "lockvalue", "2"); + String lock2 = result.getString("lock"); + + logger.info("--- now it's our first user's turn to fail to get lock ---"); + failure(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user, "locktype", Integer.toString(ALL), "lockvalue", ""); + failure(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user, "locktype", Integer.toString(COL), "lockvalue", "2"); + failure(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user, "locktype", Integer.toString(CELL), "lockvalue", "2,1"); + + logger.info("--- release the locks ---"); + success(broker, request, response, RELEASE_LOCK, "pid", project, "uid", user, "lock", lock); + success(broker, request, response, RELEASE_LOCK, "pid", project, "uid", user2, "lock", lock2); + } catch (Exception e) { + Assert.fail(); + } + } + + @Test public void testLocksCellBlocks() { + String project = "proj3"; + String user = "testuser"; + String user2 = "testuser2"; + + try { + logger.info("--- obtain CELL lock on project ---"); + JSONObject result = success(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user, "locktype", Integer.toString(CELL), "lockvalue", "1,1"); + String lock = result.getString("lock"); + + logger.info("--- other user must fail to obtain lock on the same CELL, COL or ALL ---"); + failure(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user2, "locktype", Integer.toString(ALL), "lockvalue", ""); + failure(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user2, "locktype", Integer.toString(COL), "lockvalue", "1"); + failure(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user2, "locktype", Integer.toString(CELL), "lockvalue", "1,1"); + + logger.info("--- but succeed in getting a CELL lock on a cell in another column ---"); + result = success(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user2, "locktype", Integer.toString(CELL), "lockvalue", "2,1"); + String lock2 = result.getString("lock"); + + logger.info("--- now it's our first user's turn to fail to get lock ---"); + failure(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user, "locktype", Integer.toString(ALL), "lockvalue", ""); + failure(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user, "locktype", Integer.toString(COL), "lockvalue", "2"); + failure(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user, "locktype", Integer.toString(CELL), "lockvalue", "2,1"); + + logger.info("--- release the locks ---"); + success(broker, request, response, RELEASE_LOCK, "pid", project, "uid", user, "lock", lock); + success(broker, request, response, RELEASE_LOCK, "pid", project, "uid", user2, "lock", lock2); + } catch (Exception e) { + Assert.fail(); + } + } + + @Test public void testCompleteProjectLifeCycle() { + try { + String project = "proj4"; + String user = "testuser"; + String user2 = "testuser2"; + String data = "blah"; + String metadata = "{}"; + String transformations = "[]"; + String rev = "0"; + + logger.info("--- obtain ALL lock on project ---"); + JSONObject result = success(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user, "locktype", Integer.toString(ALL), "lockvalue", ""); + assertJSON(result, "uid", user); + String lock = result.getString("lock"); + + logger.info("--- start project ---"); + success(broker, request, response, START, "pid", project, "uid", user, "lock", lock, "data", data, "metadata", metadata, "transformations", transformations); + + logger.info("--- verify project state contains lock ---"); + result = success(broker, request, response, GET_STATE, "pid", project, "uid", user, "rev", rev); + JSONArray locks = result.getJSONArray("locks"); + Assert.assertEquals(locks.length(), 1); + JSONObject l = locks.getJSONObject(0); + assertJSON(l, "uid", "testuser"); + Assert.assertEquals(l.getInt("type"), ALL); + + logger.info("--- release ALL lock on project ---"); + success(broker, request, response, RELEASE_LOCK, "pid", project, "uid", user, "lock", lock); + + logger.info("--- verify no locks are present ---"); + result = success(broker, request, response, GET_STATE, "pid", project, "uid", user, "rev", rev); + locks = result.getJSONArray("locks"); + Assert.assertEquals(locks.length(), 0); + + logger.info("--- open project and verify data was loaded correctly ---"); + result = success(broker, request, response, OPEN, "pid", project, "uid", user, "rev", rev); + JSONArray result_data = result.getJSONArray("data"); + Assert.assertEquals(result_data.length(),data.getBytes("UTF-8").length); + + JSONArray tt; + JSONObject t; + + logger.info("--- obtain column lock ---"); + String column = "1"; + result = success(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user, "locktype", Integer.toString(COL), "lockvalue", column); + String col_lock = result.getString("lock"); + + logger.info("--- perform column transformation ---"); + t = new JSONObject(); + t.put("op_type", COL); + t.put("op_value", column); // operate on col 1 + t.put("value", new JSONObject()); + tt = new JSONArray(); + tt.put(t); + result = success(broker, request, response, TRANSFORM, "pid", project, "uid", user, "lock", col_lock, "transformations", tt.toString()); + + logger.info("--- make sure transformation was recorded properly ---"); + result = success(broker, request, response, GET_STATE, "pid", project, "uid", user, "rev", "0"); + tt = result.getJSONArray("transformations"); + Assert.assertEquals(tt.length(), 1); + t = tt.getJSONObject(0); + assertJSON(t, "op_value", column); + + logger.info("--- make sure revision numbers in state management work as expected ---"); + result = success(broker, request, response, GET_STATE, "pid", project, "uid", user, "rev", "1"); + tt = result.getJSONArray("transformations"); + Assert.assertEquals(tt.length(), 0); + + logger.info("--- perform cell transformation ---"); + String cell = "1"; + t = new JSONObject(); + t.put("op_type", CELL); + t.put("op_value", column + "," + cell); // operate on cell at row 1 column 1 + t.put("value", new JSONObject()); + tt = new JSONArray(); + tt.put(t); + result = success(broker, request, response, TRANSFORM, "pid", project, "uid", user, "lock", col_lock, "transformations", tt.toString()); + + logger.info("--- make sure transformation was recorded properly ---"); + result = success(broker, request, response, GET_STATE, "pid", project, "uid", user, "rev", "0"); + tt = result.getJSONArray("transformations"); + Assert.assertEquals(tt.length(), 2); + + result = success(broker, request, response, GET_STATE, "pid", project, "uid", user, "rev", "1"); + tt = result.getJSONArray("transformations"); + Assert.assertEquals(tt.length(), 1); + t = tt.getJSONObject(0); + assertJSON(t, "op_value", column + "," + cell); + + logger.info("--- make sure another user fails to acquire ALL lock ---"); + failure(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user2, "locktype", Integer.toString(ALL), "lockvalue", ""); + + logger.info("--- make sure another user fails to acquire COL lock on the same column ---"); + failure(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user2, "locktype", Integer.toString(COL), "lockvalue", column); + + logger.info("--- make sure another user manages to acquire COL lock on another column ---"); + String column2 = "2"; + result = success(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user2, "locktype", Integer.toString(COL), "lockvalue", column2); + String col_lock2 = result.getString("lock"); + + logger.info("--- make sure that both locks are present ---"); + result = success(broker, request, response, GET_STATE, "pid", project, "uid", user, "rev", "2"); + locks = result.getJSONArray("locks"); + Assert.assertEquals(locks.length(), 2); + + logger.info("--- make sure we can't escalate our current COL lock to an ALL lock ---"); + failure(broker, request, response, OBTAIN_LOCK, "pid", project, "uid", user, "locktype", Integer.toString(ALL), "lockvalue", ""); + + logger.info("--- release column locks ---"); + success(broker, request, response, RELEASE_LOCK, "pid", project, "uid", user, "lock", col_lock); + success(broker, request, response, RELEASE_LOCK, "pid", project, "uid", user2, "lock", col_lock2); + + logger.info("--- make sure the project has no locks ---"); + result = success(broker, request, response, GET_STATE, "pid", project, "uid", user, "rev", "2"); + locks = result.getJSONArray("locks"); + Assert.assertEquals(locks.length(), 0); + + } catch (Exception e) { + Assert.fail(); + } + } + + // ------------------------------------------------------------------------------------ + + private void assertJSON(JSONObject o, String name, String value) throws JSONException { + Assert.assertEquals(o.get(name), value); + } + + private JSONObject success(GridworksBroker broker, HttpServletRequest request, HttpServletResponse response, String service, String... params) throws Exception { + return call(true, broker, request, response, service, params); + } + + private JSONObject failure(GridworksBroker broker, HttpServletRequest request, HttpServletResponse response, String service, String... params) throws Exception { + return call(false, broker, request, response, service, params); + } + + private JSONObject call(boolean successful, GridworksBroker broker, HttpServletRequest request, HttpServletResponse response, String service, String... params) throws Exception { + if (params != null) { + for (int i = 0; i < params.length; ) { + String name = params[i++]; + String value = params[i++]; + if ("data".equals(name)) { + final ByteArrayInputStream inputStream = new ByteArrayInputStream(value.getBytes("UTF-8")); + when(request.getInputStream()).thenReturn(new ServletInputStream() { + public int read() throws IOException { + return inputStream.read(); + } + }); + } else { + when(request.getParameter(name)).thenReturn(value); + } + } + } + + StringWriter writer = new StringWriter(); + when(response.getWriter()).thenReturn(new PrintWriter(writer)); + + broker.process(service, request, response); + + JSONObject result = new JSONObject(writer.toString()); + + if (successful) { + assertJSON(result, "status", "ok"); + } else { + assertJSON(result, "status", "error"); + } + + logger.info(result.toString()); + + return result; + } +} diff --git a/extensions/build.xml b/extensions/build.xml index 3804fc1be..0d77cc2f1 100644 --- a/extensions/build.xml +++ b/extensions/build.xml @@ -11,13 +11,11 @@ - - \ No newline at end of file diff --git a/extensions/jython/module/MOD-INF/controller.js b/extensions/jython/module/MOD-INF/controller.js index 5932fdaf3..159d1321e 100644 --- a/extensions/jython/module/MOD-INF/controller.js +++ b/extensions/jython/module/MOD-INF/controller.js @@ -1,10 +1,10 @@ function init() { // Packages.java.lang.System.err.println("Initializing jython extension"); - Packages.com.google.gridworks.expr.MetaParser.registerLanguageParser( + Packages.com.google.refine.expr.MetaParser.registerLanguageParser( "jython", "Jython", - Packages.com.google.gridworks.jython.JythonEvaluable.createParser(), + Packages.com.google.refine.jython.JythonEvaluable.createParser(), "return value" ); } diff --git a/extensions/jython/src/com/google/gridworks/jython/JythonEvaluable.java b/extensions/jython/src/com/google/gridworks/jython/JythonEvaluable.java deleted file mode 100644 index 5ea3aeb72..000000000 --- a/extensions/jython/src/com/google/gridworks/jython/JythonEvaluable.java +++ /dev/null @@ -1,131 +0,0 @@ -package com.google.gridworks.jython; - -import java.io.File; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.util.Properties; - -import org.python.core.Py; -import org.python.core.PyException; -import org.python.core.PyFunction; -import org.python.core.PyNone; -import org.python.core.PyObject; -import org.python.core.PyString; -import org.python.util.PythonInterpreter; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.expr.Evaluable; -import com.google.gridworks.expr.HasFields; -import com.google.gridworks.expr.LanguageSpecificParser; -import com.google.gridworks.expr.ParsingException; - -public class JythonEvaluable implements Evaluable { - - static public LanguageSpecificParser createParser() { - return new LanguageSpecificParser() { - - public Evaluable parse(String s) throws ParsingException { - return new JythonEvaluable(s); - } - }; - } - - private static final String s_functionName = "___temp___"; - - private static PythonInterpreter _engine; - - // FIXME(SM): this initialization logic depends on the fact that the JVM's - // current working directory is the root of the Gridworks distributions - // or the development checkouts. While this works in practice, it would - // be preferable to have a more reliable address space, but since we - // don't have access to the servlet context from this class this is - // the best we can do for now. - static { - File libPath = new File("webapp/WEB-INF/lib/jython"); - if (!libPath.exists() && !libPath.canRead()) { - libPath = new File("main/webapp/WEB-INF/lib/jython"); - if (!libPath.exists() && !libPath.canRead()) { - libPath = null; - } - } - - if (libPath != null) { - Properties props = new Properties(); - props.setProperty("python.path", libPath.getAbsolutePath()); - PythonInterpreter.initialize(System.getProperties(), props, new String[] { "" }); - } - - _engine = new PythonInterpreter(); - } - - public JythonEvaluable(String s) { - // indent and create a function out of the code - String[] lines = s.split("\r\n|\r|\n"); - - StringBuffer sb = new StringBuffer(1024); - sb.append("def "); - sb.append(s_functionName); - sb.append("(value, cell, cells, row, rowIndex):"); - for (int i = 0; i < lines.length; i++) { - sb.append("\n "); - sb.append(lines[i]); - } - - _engine.exec(sb.toString()); - } - - public Object evaluate(Properties bindings) { - try { - // call the temporary PyFunction directly - Object result = ((PyFunction)_engine.get(s_functionName)).__call__( - new PyObject[] { - Py.java2py( bindings.get("value") ), - new JythonHasFieldsWrapper((HasFields) bindings.get("cell"), bindings), - new JythonHasFieldsWrapper((HasFields) bindings.get("cells"), bindings), - new JythonHasFieldsWrapper((HasFields) bindings.get("row"), bindings), - Py.java2py( bindings.get("rowIndex") ) - } - ); - - return unwrap(result); - } catch (PyException e) { - return new EvalError(e.getMessage()); - } - } - - protected Object unwrap(Object result) { - if (result != null) { - if (result instanceof JythonObjectWrapper) { - return ((JythonObjectWrapper) result)._obj; - } else if (result instanceof JythonHasFieldsWrapper) { - return ((JythonHasFieldsWrapper) result)._obj; - } else if (result instanceof PyString) { - return ((PyString) result).asString(); - } else if (result instanceof PyObject) { - return unwrap((PyObject) result); - } - } - - return result; - } - - protected Object unwrap(PyObject po) { - if (po instanceof PyNone) { - return null; - } else if (po.isNumberType()) { - return po.asDouble(); - } else if (po.isSequenceType()) { - Iterator i = po.asIterable().iterator(); - - List list = new ArrayList(); - while (i.hasNext()) { - list.add(unwrap((Object) i.next())); - } - - return list.toArray(); - } else { - return po; - } - } -} diff --git a/extensions/jython/src/com/google/gridworks/jython/JythonHasFieldsWrapper.java b/extensions/jython/src/com/google/gridworks/jython/JythonHasFieldsWrapper.java deleted file mode 100644 index 610a6b1ce..000000000 --- a/extensions/jython/src/com/google/gridworks/jython/JythonHasFieldsWrapper.java +++ /dev/null @@ -1,40 +0,0 @@ -package com.google.gridworks.jython; - -import java.util.Properties; - -import org.python.core.Py; -import org.python.core.PyObject; - -import com.google.gridworks.expr.HasFields; - -public class JythonHasFieldsWrapper extends PyObject { - private static final long serialVersionUID = -1275353513262385099L; - - public HasFields _obj; - - private Properties _bindings; - - public JythonHasFieldsWrapper(HasFields obj, Properties bindings) { - _obj = obj; - _bindings = bindings; - } - - public PyObject __finditem__(PyObject key) { - String k = (String) key.__tojava__(String.class); - Object v = _obj.getField(k, _bindings); - if (v != null) { - if (v instanceof PyObject) { - return (PyObject) v; - } else if (v instanceof HasFields) { - return new JythonHasFieldsWrapper((HasFields) v, _bindings); - } else if (Py.getAdapter().canAdapt(v)) { - return Py.java2py(v); - } else { - return new JythonObjectWrapper(v); - } - } else { - return null; - } - } - -} diff --git a/extensions/jython/src/com/google/gridworks/jython/JythonObjectWrapper.java b/extensions/jython/src/com/google/gridworks/jython/JythonObjectWrapper.java deleted file mode 100644 index 03576380a..000000000 --- a/extensions/jython/src/com/google/gridworks/jython/JythonObjectWrapper.java +++ /dev/null @@ -1,17 +0,0 @@ -package com.google.gridworks.jython; - -import org.python.core.PyObject; - -public class JythonObjectWrapper extends PyObject { - private static final long serialVersionUID = -6608115027151667441L; - - public Object _obj; - - public JythonObjectWrapper(Object obj) { - _obj = obj; - } - - public String toString() { - return _obj.getClass().getSimpleName(); - } -} diff --git a/extensions/jython/src/com/google/refine/jython/JythonEvaluable.java b/extensions/jython/src/com/google/refine/jython/JythonEvaluable.java new file mode 100644 index 000000000..9dd016502 --- /dev/null +++ b/extensions/jython/src/com/google/refine/jython/JythonEvaluable.java @@ -0,0 +1,131 @@ +package com.google.refine.jython; + +import java.io.File; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Properties; + +import org.python.core.Py; +import org.python.core.PyException; +import org.python.core.PyFunction; +import org.python.core.PyNone; +import org.python.core.PyObject; +import org.python.core.PyString; +import org.python.util.PythonInterpreter; + +import com.google.refine.expr.EvalError; +import com.google.refine.expr.Evaluable; +import com.google.refine.expr.HasFields; +import com.google.refine.expr.LanguageSpecificParser; +import com.google.refine.expr.ParsingException; + +public class JythonEvaluable implements Evaluable { + + static public LanguageSpecificParser createParser() { + return new LanguageSpecificParser() { + + public Evaluable parse(String s) throws ParsingException { + return new JythonEvaluable(s); + } + }; + } + + private static final String s_functionName = "___temp___"; + + private static PythonInterpreter _engine; + + // FIXME(SM): this initialization logic depends on the fact that the JVM's + // current working directory is the root of the Gridworks distributions + // or the development checkouts. While this works in practice, it would + // be preferable to have a more reliable address space, but since we + // don't have access to the servlet context from this class this is + // the best we can do for now. + static { + File libPath = new File("webapp/WEB-INF/lib/jython"); + if (!libPath.exists() && !libPath.canRead()) { + libPath = new File("main/webapp/WEB-INF/lib/jython"); + if (!libPath.exists() && !libPath.canRead()) { + libPath = null; + } + } + + if (libPath != null) { + Properties props = new Properties(); + props.setProperty("python.path", libPath.getAbsolutePath()); + PythonInterpreter.initialize(System.getProperties(), props, new String[] { "" }); + } + + _engine = new PythonInterpreter(); + } + + public JythonEvaluable(String s) { + // indent and create a function out of the code + String[] lines = s.split("\r\n|\r|\n"); + + StringBuffer sb = new StringBuffer(1024); + sb.append("def "); + sb.append(s_functionName); + sb.append("(value, cell, cells, row, rowIndex):"); + for (int i = 0; i < lines.length; i++) { + sb.append("\n "); + sb.append(lines[i]); + } + + _engine.exec(sb.toString()); + } + + public Object evaluate(Properties bindings) { + try { + // call the temporary PyFunction directly + Object result = ((PyFunction)_engine.get(s_functionName)).__call__( + new PyObject[] { + Py.java2py( bindings.get("value") ), + new JythonHasFieldsWrapper((HasFields) bindings.get("cell"), bindings), + new JythonHasFieldsWrapper((HasFields) bindings.get("cells"), bindings), + new JythonHasFieldsWrapper((HasFields) bindings.get("row"), bindings), + Py.java2py( bindings.get("rowIndex") ) + } + ); + + return unwrap(result); + } catch (PyException e) { + return new EvalError(e.getMessage()); + } + } + + protected Object unwrap(Object result) { + if (result != null) { + if (result instanceof JythonObjectWrapper) { + return ((JythonObjectWrapper) result)._obj; + } else if (result instanceof JythonHasFieldsWrapper) { + return ((JythonHasFieldsWrapper) result)._obj; + } else if (result instanceof PyString) { + return ((PyString) result).asString(); + } else if (result instanceof PyObject) { + return unwrap((PyObject) result); + } + } + + return result; + } + + protected Object unwrap(PyObject po) { + if (po instanceof PyNone) { + return null; + } else if (po.isNumberType()) { + return po.asDouble(); + } else if (po.isSequenceType()) { + Iterator i = po.asIterable().iterator(); + + List list = new ArrayList(); + while (i.hasNext()) { + list.add(unwrap((Object) i.next())); + } + + return list.toArray(); + } else { + return po; + } + } +} diff --git a/extensions/jython/src/com/google/refine/jython/JythonHasFieldsWrapper.java b/extensions/jython/src/com/google/refine/jython/JythonHasFieldsWrapper.java new file mode 100644 index 000000000..e556ee4b6 --- /dev/null +++ b/extensions/jython/src/com/google/refine/jython/JythonHasFieldsWrapper.java @@ -0,0 +1,40 @@ +package com.google.refine.jython; + +import java.util.Properties; + +import org.python.core.Py; +import org.python.core.PyObject; + +import com.google.refine.expr.HasFields; + +public class JythonHasFieldsWrapper extends PyObject { + private static final long serialVersionUID = -1275353513262385099L; + + public HasFields _obj; + + private Properties _bindings; + + public JythonHasFieldsWrapper(HasFields obj, Properties bindings) { + _obj = obj; + _bindings = bindings; + } + + public PyObject __finditem__(PyObject key) { + String k = (String) key.__tojava__(String.class); + Object v = _obj.getField(k, _bindings); + if (v != null) { + if (v instanceof PyObject) { + return (PyObject) v; + } else if (v instanceof HasFields) { + return new JythonHasFieldsWrapper((HasFields) v, _bindings); + } else if (Py.getAdapter().canAdapt(v)) { + return Py.java2py(v); + } else { + return new JythonObjectWrapper(v); + } + } else { + return null; + } + } + +} diff --git a/extensions/jython/src/com/google/refine/jython/JythonObjectWrapper.java b/extensions/jython/src/com/google/refine/jython/JythonObjectWrapper.java new file mode 100644 index 000000000..3b8e38d4d --- /dev/null +++ b/extensions/jython/src/com/google/refine/jython/JythonObjectWrapper.java @@ -0,0 +1,17 @@ +package com.google.refine.jython; + +import org.python.core.PyObject; + +public class JythonObjectWrapper extends PyObject { + private static final long serialVersionUID = -6608115027151667441L; + + public Object _obj; + + public JythonObjectWrapper(Object obj) { + _obj = obj; + } + + public String toString() { + return _obj.getClass().getSimpleName(); + } +} diff --git a/extensions/sample-extension/module/MOD-INF/controller.js b/extensions/sample-extension/module/MOD-INF/controller.js index 80c0a42a6..b317683ac 100644 --- a/extensions/sample-extension/module/MOD-INF/controller.js +++ b/extensions/sample-extension/module/MOD-INF/controller.js @@ -1,6 +1,6 @@ var html = "text/html"; var encoding = "UTF-8"; -var ClientSideResourceManager = Packages.com.google.gridworks.ClientSideResourceManager; +var ClientSideResourceManager = Packages.com.google.refine.ClientSideResourceManager; /* * Function invoked to initialize the extension. @@ -39,7 +39,7 @@ function process(path, request, response) { // here's how to pass things into the .vt templates context.someList = ["Superior","Michigan","Huron","Erie","Ontario"]; context.someString = "foo"; - context.someInt = Packages.com.google.gridworks.sampleExtension.SampleUtil.stringArrayLength(context.someList); + context.someInt = Packages.com.google.refine.sampleExtension.SampleUtil.stringArrayLength(context.someList); send(request, response, "index.vt", context); } diff --git a/extensions/sample-extension/src/com/google/gridworks/sampleExtension/SampleUtil.java b/extensions/sample-extension/src/com/google/gridworks/sampleExtension/SampleUtil.java deleted file mode 100644 index 7c500a5a5..000000000 --- a/extensions/sample-extension/src/com/google/gridworks/sampleExtension/SampleUtil.java +++ /dev/null @@ -1,8 +0,0 @@ -package com.google.gridworks.sampleExtension; - - -public class SampleUtil { - static public int stringArrayLength(String[] a) { - return a.length; - } -} diff --git a/extensions/sample-extension/src/com/google/refine/sampleExtension/SampleUtil.java b/extensions/sample-extension/src/com/google/refine/sampleExtension/SampleUtil.java new file mode 100644 index 000000000..af57bca5b --- /dev/null +++ b/extensions/sample-extension/src/com/google/refine/sampleExtension/SampleUtil.java @@ -0,0 +1,8 @@ +package com.google.refine.sampleExtension; + + +public class SampleUtil { + static public int stringArrayLength(String[] a) { + return a.length; + } +} diff --git a/gridworks b/gridworks index b93efe1da..aa133322f 100755 --- a/gridworks +++ b/gridworks @@ -622,7 +622,7 @@ run() { CLASSPATH="$GRIDWORKS_CLASSES_DIR${SEP}$GRIDWORKS_LIB_DIR/*" - RUN_CMD="$JAVA -cp $CLASSPATH $OPTS com.google.gridworks.Gridworks" + RUN_CMD="$JAVA -cp $CLASSPATH $OPTS com.google.refine.Gridworks" #echo "$RUN_CMD" #echo "" @@ -674,7 +674,7 @@ broker_run() { CLASSPATH="$GRIDWORKS_CLASSES_DIR${SEP}$GRIDWORKS_LIB_DIR/*" - RUN_CMD="$JAVA -cp $CLASSPATH $OPTS com.google.gridworks.Gridworks" + RUN_CMD="$JAVA -cp $CLASSPATH $OPTS com.google.refine.Gridworks" #echo "$RUN_CMD" #echo "" diff --git a/gridworks.bat b/gridworks.bat index a7673efba..7b7ad6de5 100644 --- a/gridworks.bat +++ b/gridworks.bat @@ -159,7 +159,7 @@ if ""%ACTION%"" == ""run"" goto doRun :doRun set CLASSPATH="%GRIDWORKS_CLASSES_DIR%;%GRIDWORKS_LIB_DIR%\*" -"%JAVA_HOME%\bin\java.exe" -cp %CLASSPATH% %OPTS% -Djava.library.path=%GRIDWORKS_LIB_DIR%/native/windows com.google.gridworks.Gridworks +"%JAVA_HOME%\bin\java.exe" -cp %CLASSPATH% %OPTS% -Djava.library.path=%GRIDWORKS_LIB_DIR%/native/windows com.google.refine.Gridworks goto end :doAnt diff --git a/main/src/com/google/gridworks/ClientSideResourceManager.java b/main/src/com/google/gridworks/ClientSideResourceManager.java deleted file mode 100644 index 5bb82642a..000000000 --- a/main/src/com/google/gridworks/ClientSideResourceManager.java +++ /dev/null @@ -1,91 +0,0 @@ -package com.google.gridworks; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import edu.mit.simile.butterfly.ButterflyModule; -import edu.mit.simile.butterfly.MountPoint; - - -public class ClientSideResourceManager { - final static Logger logger = LoggerFactory.getLogger("gridworks_clientSideResourceManager"); - - static public class QualifiedPath { - public ButterflyModule module; - public String path; - public String fullPath; - } - static public class ClientSideResourceBundle { - final protected Set _pathSet = new HashSet(); - final protected List _pathList = new ArrayList(); - } - - final static protected Map s_bundles - = new HashMap(); - - static public void addPaths( - String bundleName, - ButterflyModule module, - String[] paths) { - - ClientSideResourceBundle bundle = s_bundles.get(bundleName); - if (bundle == null) { - bundle = new ClientSideResourceBundle(); - s_bundles.put(bundleName, bundle); - } - - for (String path : paths) { - String fullPath = resolve(module, path); - if (fullPath == null) { - logger.error("Failed to add paths to unmounted module " + module.getName()); - break; - } - if (!bundle._pathSet.contains(fullPath)) { - QualifiedPath qualifiedPath = new QualifiedPath(); - qualifiedPath.module = module; - qualifiedPath.path = path; - qualifiedPath.fullPath = fullPath; - - bundle._pathSet.add(fullPath); - bundle._pathList.add(qualifiedPath); - } - } - } - - static public QualifiedPath[] getPaths(String bundleName) { - ClientSideResourceBundle bundle = s_bundles.get(bundleName); - if (bundle == null) { - return new QualifiedPath[] {}; - } else { - QualifiedPath[] paths = new QualifiedPath[bundle._pathList.size()]; - bundle._pathList.toArray(paths); - return paths; - } - } - - static protected String resolve(ButterflyModule module, String path) { - MountPoint mountPoint = module.getMountPoint(); - if (mountPoint != null) { - String mountPointPath = mountPoint.getMountPoint(); - if (mountPointPath != null) { - StringBuffer sb = new StringBuffer(); - - boolean slashed = path.startsWith("/"); - char[] mountPointChars = mountPointPath.toCharArray(); - - sb.append(mountPointChars, 0, slashed ? mountPointChars.length - 1 : mountPointChars.length); - sb.append(path); - - return sb.toString(); - } - } - return null; - } -} diff --git a/main/src/com/google/gridworks/GridworksServlet.java b/main/src/com/google/gridworks/GridworksServlet.java deleted file mode 100644 index 7c30c1e45..000000000 --- a/main/src/com/google/gridworks/GridworksServlet.java +++ /dev/null @@ -1,230 +0,0 @@ -package com.google.gridworks; - -import java.io.File; -import java.io.IOException; -import java.util.HashMap; -import java.util.Map; -import java.util.Timer; -import java.util.TimerTask; - -import javax.servlet.ServletConfig; -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.google.gridworks.commands.Command; -import com.google.gridworks.io.FileProjectManager; - -import edu.mit.simile.butterfly.Butterfly; -import edu.mit.simile.butterfly.ButterflyModule; - -public class GridworksServlet extends Butterfly { - - static private final String VERSION = "1.5"; - - private static final long serialVersionUID = 2386057901503517403L; - - private static final String JAVAX_SERVLET_CONTEXT_TEMPDIR = "javax.servlet.context.tempdir"; - - static private GridworksServlet s_singleton; - static private File s_dataDir; - - static final private Map commands = new HashMap(); - - // timer for periodically saving projects - static private Timer _timer; - - final static Logger logger = LoggerFactory.getLogger("gridworks"); - - public static String getVersion() { - return VERSION; - } - - final static protected long s_autoSavePeriod = 1000 * 60 * 5; // 5 minutes - - static protected class AutoSaveTimerTask extends TimerTask { - public void run() { - try { - ProjectManager.singleton.save(false); // quick, potentially incomplete save - } finally { - _timer.schedule(new AutoSaveTimerTask(), s_autoSavePeriod); - // we don't use scheduleAtFixedRate because that might result in - // bunched up events when the computer is put in sleep mode - } - } - } - - protected ServletConfig config; - - @Override - public void init() throws ServletException { - super.init(); - - s_singleton = this; - - logger.trace("> initialize"); - - String data = getInitParameter("gridworks.data"); - - if (data == null) { - throw new ServletException("can't find servlet init config 'gridworks.data', I have to give up initializing"); - } - - s_dataDir = new File(data); - FileProjectManager.initialize(s_dataDir); - - if (_timer == null) { - _timer = new Timer("autosave"); - _timer.schedule(new AutoSaveTimerTask(), s_autoSavePeriod); - } - - logger.trace("< initialize"); - } - - @Override - public void destroy() { - logger.trace("> destroy"); - - // cancel automatic periodic saving and force a complete save. - if (_timer != null) { - _timer.cancel(); - _timer = null; - } - if (ProjectManager.singleton != null) { - ProjectManager.singleton.dispose(); - ProjectManager.singleton = null; - } - - this.config = null; - - logger.trace("< destroy"); - - super.destroy(); - } - - @Override - public void service(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { - if (request.getPathInfo().startsWith("/command/")) { - String commandKey = getCommandKey(request); - Command command = commands.get(commandKey); - if (command != null) { - if (request.getMethod().equals("GET")) { - logger.trace("> GET {}", commandKey); - command.doGet(request, response); - logger.trace("< GET {}", commandKey); - } else if (request.getMethod().equals("POST")) { - logger.trace("> POST {}", commandKey); - command.doPost(request, response); - logger.trace("< POST {}", commandKey); - } else { - response.sendError(405); - } - } else { - response.sendError(404); - } - } else { - super.service(request, response); - } - } - - protected String getCommandKey(HttpServletRequest request) { - // A command path has this format: /command/module-name/command-name/... - - String path = request.getPathInfo().substring("/command/".length()); - - int slash1 = path.indexOf('/'); - if (slash1 >= 0) { - int slash2 = path.indexOf('/', slash1 + 1); - if (slash2 > 0) { - path = path.substring(0, slash2); - } - } - - return path; - } - - private File tempDir = null; - - public File getTempDir() { - if (tempDir == null) { - File tempDir = (File) this.config.getServletContext().getAttribute(JAVAX_SERVLET_CONTEXT_TEMPDIR); - if (tempDir == null) { - throw new RuntimeException("This app server doesn't support temp directories"); - } - } - return tempDir; - } - - public File getTempFile(String name) { - return new File(getTempDir(), name); - } - - public File getCacheDir(String name) { - File dir = new File(new File(s_dataDir, "cache"), name); - dir.mkdirs(); - - return dir; - } - - public String getConfiguration(String name, String def) { - return null; - } - - /** - * Register a single command. - * - * @param module the module the command belongs to - * @param name command verb for command - * @param commandObject object implementing the command - * @return true if command was loaded and registered successfully - */ - protected boolean registerOneCommand(ButterflyModule module, String name, Command commandObject) { - return registerOneCommand(module.getName() + "/" + name, commandObject); - } - - /** - * Register a single command. - * - * @param path path for command - * @param commandObject object implementing the command - * @return true if command was loaded and registered successfully - */ - protected boolean registerOneCommand(String path, Command commandObject) { - if (commands.containsKey(path)) { - return false; - } - - commandObject.init(this); - commands.put(path, commandObject); - - return true; - } - - // Currently only for test purposes - protected boolean unregisterCommand(String verb) { - return commands.remove(verb) != null; - } - - /** - * Register a single command. Used by extensions. - * - * @param module the module the command belongs to - * @param name command verb for command - * @param commandObject object implementing the command - * - * @return true if command was loaded and registered successfully - */ - static public boolean registerCommand(ButterflyModule module, String commandName, Command commandObject) { - return s_singleton.registerOneCommand(module, commandName, commandObject); - } - - static public Class getClass(String className) throws ClassNotFoundException { - if (className.startsWith("com.metaweb.")) { - className = "com.google." + className.substring("com.metaweb.".length()); - } - return Class.forName(className); - } -} \ No newline at end of file diff --git a/main/src/com/google/gridworks/InterProjectModel.java b/main/src/com/google/gridworks/InterProjectModel.java deleted file mode 100644 index b55291623..000000000 --- a/main/src/com/google/gridworks/InterProjectModel.java +++ /dev/null @@ -1,128 +0,0 @@ -package com.google.gridworks; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; - -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.expr.HasFieldsListImpl; -import com.google.gridworks.expr.WrappedRow; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; - -public class InterProjectModel { - static public class ProjectJoin { - final public long fromProjectID; - final public String fromProjectColumnName; - final public long toProjectID; - final public String toProjectColumnName; - - final public Map> valueToRowIndices = - new HashMap>(); - - ProjectJoin( - long fromProjectID, - String fromProjectColumnName, - long toProjectID, - String toProjectColumnName - ) { - this.fromProjectID = fromProjectID; - this.fromProjectColumnName = fromProjectColumnName; - this.toProjectID = toProjectID; - this.toProjectColumnName = toProjectColumnName; - } - - public HasFieldsListImpl getRows(Object value) { - if (ExpressionUtils.isNonBlankData(value) && valueToRowIndices.containsKey(value)) { - Project toProject = ProjectManager.singleton.getProject(toProjectID); - if (toProject != null) { - HasFieldsListImpl rows = new HasFieldsListImpl(); - for (Integer r : valueToRowIndices.get(value)) { - Row row = toProject.rows.get(r); - rows.add(new WrappedRow(toProject, r, row)); - } - - return rows; - } - } - return null; - } - } - - protected Map _joins = new HashMap(); - - public ProjectJoin getJoin(String fromProject, String fromColumn, String toProject, String toColumn) { - String key = fromProject + ";" + fromColumn + ";" + toProject + ";" + toColumn; - if (!_joins.containsKey(key)) { - ProjectJoin join = new ProjectJoin( - ProjectManager.singleton.getProjectID(fromProject), - fromColumn, - ProjectManager.singleton.getProjectID(toProject), - toColumn - ); - - computeJoin(join); - - _joins.put(key, join); - } - - return _joins.get(key); - } - - public void flushJoinsInvolvingProject(long projectID) { - for (Entry entry : _joins.entrySet()) { - ProjectJoin join = entry.getValue(); - if (join.fromProjectID == projectID || join.toProjectID == projectID) { - _joins.remove(entry.getKey()); - } - } - } - - public void flushJoinsInvolvingProjectColumn(long projectID, String columnName) { - for (Entry entry : _joins.entrySet()) { - ProjectJoin join = entry.getValue(); - if (join.fromProjectID == projectID && join.fromProjectColumnName.equals(columnName) || - join.toProjectID == projectID && join.toProjectColumnName.equals(columnName)) { - _joins.remove(entry.getKey()); - } - } - } - - protected void computeJoin(ProjectJoin join) { - if (join.fromProjectID < 0 || join.toProjectID < 0) { - return; - } - - Project fromProject = ProjectManager.singleton.getProject(join.fromProjectID); - Project toProject = ProjectManager.singleton.getProject(join.toProjectID); - if (fromProject == null || toProject == null) { - return; - } - - Column fromColumn = fromProject.columnModel.getColumnByName(join.fromProjectColumnName); - Column toColumn = toProject.columnModel.getColumnByName(join.toProjectColumnName); - if (fromColumn == null || toColumn == null) { - return; - } - - for (Row fromRow : fromProject.rows) { - Object value = fromRow.getCellValue(fromColumn.getCellIndex()); - if (ExpressionUtils.isNonBlankData(value) && !join.valueToRowIndices.containsKey(value)) { - join.valueToRowIndices.put(value, new ArrayList()); - } - } - - int count = toProject.rows.size(); - for (int r = 0; r < count; r++) { - Row toRow = toProject.rows.get(r); - - Object value = toRow.getCellValue(toColumn.getCellIndex()); - if (ExpressionUtils.isNonBlankData(value) && join.valueToRowIndices.containsKey(value)) { - join.valueToRowIndices.get(value).add(r); - } - } - } -} diff --git a/main/src/com/google/gridworks/Jsonizable.java b/main/src/com/google/gridworks/Jsonizable.java deleted file mode 100644 index 3f4d42de9..000000000 --- a/main/src/com/google/gridworks/Jsonizable.java +++ /dev/null @@ -1,16 +0,0 @@ -package com.google.gridworks; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -/** - * Interface for streaming out JSON, either into HTTP responses or - * serialization files. - * - * @author dfhuynh - */ -public interface Jsonizable { - public void write(JSONWriter writer, Properties options) throws JSONException; -} diff --git a/main/src/com/google/gridworks/ProjectManager.java b/main/src/com/google/gridworks/ProjectManager.java deleted file mode 100644 index 48842f0c2..000000000 --- a/main/src/com/google/gridworks/ProjectManager.java +++ /dev/null @@ -1,434 +0,0 @@ -package com.google.gridworks; - -import java.io.IOException; -import java.io.InputStream; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.Date; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; - -import org.apache.tools.tar.TarOutputStream; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.google.gridworks.history.HistoryEntryManager; -import com.google.gridworks.model.Project; -import com.google.gridworks.preference.PreferenceStore; -import com.google.gridworks.preference.TopList; - -/** - * ProjectManager is responsible for loading and saving the workspace and projects. - * - * - */ -public abstract class ProjectManager { - // last n expressions used across all projects - static protected final int s_expressionHistoryMax = 100; - - protected Map _projectsMetadata; - protected PreferenceStore _preferenceStore; - - final static Logger logger = LoggerFactory.getLogger("project_manager"); - - /** - * What caches the joins between projects. - */ - transient protected InterProjectModel _interProjectModel = new InterProjectModel(); - - /** - * Flags - */ - transient protected int _busy = 0; // heavy operations like creating or importing projects are going on - - /** - * While each project's metadata is loaded completely at start-up, each project's raw data - * is loaded only when the project is accessed by the user. This is because project - * metadata is tiny compared to raw project data. This hash map from project ID to project - * is more like a last accessed-last out cache. - */ - transient protected Map _projects; - - static public ProjectManager singleton; - - protected ProjectManager(){ - _projectsMetadata = new HashMap(); - _preferenceStore = new PreferenceStore(); - _projects = new HashMap(); - - preparePreferenceStore(_preferenceStore); - } - - public void dispose() { - save(true); // complete save - - for (Project project : _projects.values()) { - if (project != null) { - project.dispose(); - } - } - - _projects.clear(); - _projectsMetadata.clear(); - } - - /** - * Registers the project in the memory of the current session - * @param project - * @param projectMetadata - */ - public void registerProject(Project project, ProjectMetadata projectMetadata) { - synchronized (this) { - _projects.put(project.id, project); - _projectsMetadata.put(project.id, projectMetadata); - } - } - //----------Load from data store to memory---------------- - - /** - * Load project metadata from data storage - * @param projectID - * @return - */ - public abstract boolean loadProjectMetadata(long projectID); - - /** - * Loads a project from the data store into memory - * @param id - * @return - */ - protected abstract Project loadProject(long id); - - //------------Import and Export from Gridworks archive----------------- - /** - * Import project from a Gridworks archive - * @param projectID - * @param inputStream - * @param gziped - * @throws IOException - */ - public abstract void importProject(long projectID, InputStream inputStream, boolean gziped) throws IOException; - - /** - * Export project to a Gridworks archive - * @param projectId - * @param tos - * @throws IOException - */ - public abstract void exportProject(long projectId, TarOutputStream tos) throws IOException; - - - //------------Save to record store------------ - /** - * Saves a project and its metadata to the data store - * @param id - */ - public void ensureProjectSaved(long id) { - synchronized(this){ - ProjectMetadata metadata = this.getProjectMetadata(id); - if (metadata != null) { - try { - saveMetadata(metadata, id); - } catch (Exception e) { - e.printStackTrace(); - } - }//FIXME what should be the behaviour if metadata is null? i.e. not found - - Project project = getProject(id); - if (project != null && metadata != null && metadata.getModified().after(project.getLastSave())) { - try { - saveProject(project); - } catch (Exception e) { - e.printStackTrace(); - } - }//FIXME what should be the behaviour if project is null? i.e. not found or loaded. - //FIXME what should happen if the metadata is found, but not the project? or vice versa? - } - - } - - /** - * Save project metadata to the data store - * @param metadata - * @param projectId - * @throws Exception - */ - protected abstract void saveMetadata(ProjectMetadata metadata, long projectId) throws Exception; - - /** - * Save project to the data store - * @param project - */ - protected abstract void saveProject(Project project); - - /** - * Save workspace and all projects to data store - * @param allModified - */ - public void save(boolean allModified) { - if (allModified || _busy == 0) { - saveProjects(allModified); - saveWorkspace(); - } - } - - /** - * Saves the workspace to the data store - */ - protected abstract void saveWorkspace(); - - /** - * A utility class to prioritize projects for saving, depending on how long ago - * they have been changed but have not been saved. - */ - static protected class SaveRecord { - final Project project; - final long overdue; - - SaveRecord(Project project, long overdue) { - this.project = project; - this.overdue = overdue; - } - } - - static protected final int s_projectFlushDelay = 1000 * 60 * 60; // 1 hour - static protected final int s_quickSaveTimeout = 1000 * 30; // 30 secs - - /** - * Saves all projects to the data store - * @param allModified - */ - protected void saveProjects(boolean allModified) { - List records = new ArrayList(); - Date startTimeOfSave = new Date(); - - synchronized (this) { - for (long id : _projectsMetadata.keySet()) { - ProjectMetadata metadata = getProjectMetadata(id); - Project project = _projects.get(id); // don't call getProject() as that will load the project. - - if (project != null) { - boolean hasUnsavedChanges = - metadata.getModified().getTime() > project.getLastSave().getTime(); - - if (hasUnsavedChanges) { - long msecsOverdue = startTimeOfSave.getTime() - project.getLastSave().getTime(); - - records.add(new SaveRecord(project, msecsOverdue)); - - } else if (startTimeOfSave.getTime() - project.getLastSave().getTime() > s_projectFlushDelay) { - /* - * It's been a while since the project was last saved and it hasn't been - * modified. We can safely remove it from the cache to save some memory. - */ - _projects.remove(id).dispose(); - } - } - } - } - - if (records.size() > 0) { - Collections.sort(records, new Comparator() { - public int compare(SaveRecord o1, SaveRecord o2) { - if (o1.overdue < o2.overdue) { - return 1; - } else if (o1.overdue > o2.overdue) { - return -1; - } else { - return 0; - } - } - }); - - logger.info(allModified ? - "Saving all modified projects ..." : - "Saving some modified projects ..." - ); - - for (int i = 0; - i < records.size() && - (allModified || (new Date().getTime() - startTimeOfSave.getTime() < s_quickSaveTimeout)); - i++) { - - try { - saveProject(records.get(i).project); - } catch (Exception e) { - e.printStackTrace(); - } - } - } - } - - //--------------Get from memory-------------- - /** - * Gets the InterProjectModel from memory - */ - public InterProjectModel getInterProjectModel() { - return _interProjectModel; - } - - - /** - * Gets the project metadata from memory - * Requires that the metadata has already been loaded from the data store - * @param id - * @return - */ - public ProjectMetadata getProjectMetadata(long id) { - return _projectsMetadata.get(id); - } - - /** - * Gets the project metadata from memory - * Requires that the metadata has already been loaded from the data store - * @param name - * @return - */ - public ProjectMetadata getProjectMetadata(String name) { - for (ProjectMetadata pm : _projectsMetadata.values()) { - if (pm.getName().equals(name)) { - return pm; - } - } - return null; - } - - /** - * Tries to find the project id when given a project name - * Requires that all project metadata exists has been loaded to memory from the data store - * @param name - * The name of the project - * @return - * The id of the project, or -1 if it cannot be found - */ - public long getProjectID(String name) { - for (Entry entry : _projectsMetadata.entrySet()) { - if (entry.getValue().getName().equals(name)) { - return entry.getKey(); - } - } - return -1; - } - - - /** - * Gets all the project Metadata currently held in memory - * @return - */ - public Map getAllProjectMetadata() { - return _projectsMetadata; - } - - /** - * Gets the required project from the data store - * If project does not already exist in memory, it is loaded from the data store - * @param id - * the id of the project - * @return - * the project with the matching id, or null if it can't be found - */ - public Project getProject(long id) { - synchronized (this) { - if (_projects.containsKey(id)) { - return _projects.get(id); - } else { - Project project = loadProject(id); - - _projects.put(id, project); - - return project; - } - } - } - - /** - * Gets the preference store - * @return - */ - public PreferenceStore getPreferenceStore() { - return _preferenceStore; - } - - /** - * Gets all expressions from the preference store - * @return - */ - public List getExpressions() { - return ((TopList) _preferenceStore.get("scripting.expressions")).getList(); - } - - /** - * The history entry manager deals with changes - * @return manager for handling history - */ - public abstract HistoryEntryManager getHistoryEntryManager(); - - //-------------remove project----------- - - /** - * Remove the project from the data store - * @param project - */ - public void deleteProject(Project project) { - deleteProject(project.id); - } - - /** - * Remove project from data store - * @param projectID - */ - public abstract void deleteProject(long projectID); - - /** - * Removes project from memory - * @param projectID - */ - protected void removeProject(long projectID){ - if (_projects.containsKey(projectID)) { - _projects.remove(projectID).dispose(); - } - if (_projectsMetadata.containsKey(projectID)) { - _projectsMetadata.remove(projectID); - } - } - - //--------------Miscellaneous----------- - - /** - * Sets the flag for long running operations - * @param busy - */ - public void setBusy(boolean busy) { - synchronized (this) { - if (busy) { - _busy++; - } else { - _busy--; - } - } - } - - - - /** - * Add the latest expression to the preference store - * @param s - */ - public void addLatestExpression(String s) { - synchronized (this) { - ((TopList) _preferenceStore.get("scripting.expressions")).add(s); - } - } - - - /** - * - * @param ps - */ - static protected void preparePreferenceStore(PreferenceStore ps) { - ps.put("scripting.expressions", new TopList(s_expressionHistoryMax)); - } -} diff --git a/main/src/com/google/gridworks/ProjectMetadata.java b/main/src/com/google/gridworks/ProjectMetadata.java deleted file mode 100644 index a5a1cfc36..000000000 --- a/main/src/com/google/gridworks/ProjectMetadata.java +++ /dev/null @@ -1,198 +0,0 @@ -package com.google.gridworks; - -import java.io.Serializable; -import java.util.Date; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Map; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.google.gridworks.preference.PreferenceStore; -import com.google.gridworks.preference.TopList; -import com.google.gridworks.util.JSONUtilities; -import com.google.gridworks.util.ParsingUtilities; - -public class ProjectMetadata implements Jsonizable { - private final Date _created; - private Date _modified; - private String _name; - private String _password; - - private String _encoding; - private int _encodingConfidence; - - private Map _customMetadata = new HashMap(); - private PreferenceStore _preferenceStore = new PreferenceStore(); - - final Logger logger = LoggerFactory.getLogger("project_metadata"); - - protected ProjectMetadata(Date date) { - _created = date; - preparePreferenceStore(_preferenceStore); - } - - public ProjectMetadata() { - _created = new Date(); - _modified = _created; - preparePreferenceStore(_preferenceStore); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("name"); writer.value(_name); - writer.key("created"); writer.value(ParsingUtilities.dateToString(_created)); - writer.key("modified"); writer.value(ParsingUtilities.dateToString(_modified)); - - if ("save".equals(options.getProperty("mode"))) { - writer.key("password"); writer.value(_password); - - writer.key("encoding"); writer.value(_encoding); - writer.key("encodingConfidence"); writer.value(_encodingConfidence); - - writer.key("customMetadata"); writer.object(); - for (String key : _customMetadata.keySet()) { - Serializable value = _customMetadata.get(key); - writer.key(key); - writer.value(value); - } - writer.endObject(); - - writer.key("preferences"); _preferenceStore.write(writer, options); - } - writer.endObject(); - } - - public void write(JSONWriter jsonWriter) throws Exception { - Properties options = new Properties(); - options.setProperty("mode", "save"); - - write(jsonWriter, options); - } - - static public ProjectMetadata loadFromJSON(JSONObject obj) { - ProjectMetadata pm = new ProjectMetadata(JSONUtilities.getDate(obj, "modified", new Date())); - - pm._modified = JSONUtilities.getDate(obj, "modified", new Date()); - pm._name = JSONUtilities.getString(obj, "name", ""); - pm._password = JSONUtilities.getString(obj, "password", ""); - - pm._encoding = JSONUtilities.getString(obj, "encoding", ""); - pm._encodingConfidence = JSONUtilities.getInt(obj, "encodingConfidence", 0); - - if (obj.has("preferences") && !obj.isNull("preferences")) { - try { - pm._preferenceStore.load(obj.getJSONObject("preferences")); - } catch (JSONException e) { - // ignore - } - } - - if (obj.has("expressions") && !obj.isNull("expressions")) { // backward compatibility - try { - ((TopList) pm._preferenceStore.get("scripting.expressions")) - .load(obj.getJSONArray("expressions")); - } catch (JSONException e) { - // ignore - } - } - - if (obj.has("customMetadata") && !obj.isNull("customMetadata")) { - try { - JSONObject obj2 = obj.getJSONObject("customMetadata"); - - @SuppressWarnings("unchecked") - Iterator keys = obj2.keys(); - while (keys.hasNext()) { - String key = keys.next(); - Object value = obj2.get(key); - if (value != null && value instanceof Serializable) { - pm._customMetadata.put(key, (Serializable) value); - } - } - } catch (JSONException e) { - // ignore - } - } - - return pm; - } - - static protected void preparePreferenceStore(PreferenceStore ps) { - ProjectManager.preparePreferenceStore(ps); - // Any project specific preferences? - } - - public Date getCreated() { - return _created; - } - - public void setName(String name) { - this._name = name; - } - - public String getName() { - return _name; - } - - public void setEncoding(String encoding) { - this._encoding = encoding; - } - - public String getEncoding() { - return _encoding; - } - - public void setEncodingConfidence(int confidence) { - this._encodingConfidence = confidence; - } - - public void setEncodingConfidence(String confidence) { - if (confidence != null) { - this.setEncodingConfidence(Integer.parseInt(confidence)); - } - } - - public int getEncodingConfidence() { - return _encodingConfidence; - } - - public void setPassword(String password) { - this._password = password; - } - - public String getPassword() { - return _password; - } - - public Date getModified() { - return _modified; - } - - public void updateModified() { - _modified = new Date(); - } - - public PreferenceStore getPreferenceStore() { - return _preferenceStore; - } - - public Serializable getCustomMetadata(String key) { - return _customMetadata.get(key); - } - - public void setCustomMetadata(String key, Serializable value) { - if (value == null) { - _customMetadata.remove(key); - } else { - _customMetadata.put(key, value); - } - } -} diff --git a/main/src/com/google/gridworks/browsing/DecoratedValue.java b/main/src/com/google/gridworks/browsing/DecoratedValue.java deleted file mode 100644 index ec24fd0a6..000000000 --- a/main/src/com/google/gridworks/browsing/DecoratedValue.java +++ /dev/null @@ -1,33 +0,0 @@ -package com.google.gridworks.browsing; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.Jsonizable; - -/** - * Store a value and its text label, in case the value is not a string itself. - * For instance, if a value is a date, then its label can be one particular - * rendering of that date. - * - * Facet choices that are presented to the user as text are stored as decorated values. - */ -public class DecoratedValue implements Jsonizable { - final public Object value; - final public String label; - - public DecoratedValue(Object value, String label) { - this.value = value; - this.label = label; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - writer.object(); - writer.key("v"); writer.value(value); - writer.key("l"); writer.value(label); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/browsing/Engine.java b/main/src/com/google/gridworks/browsing/Engine.java deleted file mode 100644 index df28aa93e..000000000 --- a/main/src/com/google/gridworks/browsing/Engine.java +++ /dev/null @@ -1,214 +0,0 @@ -package com.google.gridworks.browsing; - -import java.util.LinkedList; -import java.util.List; -import java.util.Properties; - -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.Jsonizable; -import com.google.gridworks.browsing.facets.Facet; -import com.google.gridworks.browsing.facets.ListFacet; -import com.google.gridworks.browsing.facets.RangeFacet; -import com.google.gridworks.browsing.facets.ScatterplotFacet; -import com.google.gridworks.browsing.facets.TextSearchFacet; -import com.google.gridworks.browsing.facets.TimeRangeFacet; -import com.google.gridworks.browsing.util.ConjunctiveFilteredRecords; -import com.google.gridworks.browsing.util.ConjunctiveFilteredRows; -import com.google.gridworks.browsing.util.FilteredRecordsAsFilteredRows; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; - -/** - * Faceted browsing engine. - */ -public class Engine implements Jsonizable { - static public enum Mode { - RowBased, - RecordBased - } - - public final static String INCLUDE_DEPENDENT = "includeDependent"; - public final static String MODE = "mode"; - public final static String MODE_ROW_BASED = "row-based"; - public final static String MODE_RECORD_BASED = "record-based"; - - protected Project _project; - protected List _facets = new LinkedList(); - protected Mode _mode = Mode.RowBased; - - static public String modeToString(Mode mode) { - return mode == Mode.RowBased ? MODE_ROW_BASED : MODE_RECORD_BASED; - } - static public Mode stringToMode(String s) { - return MODE_ROW_BASED.equals(s) ? Mode.RowBased : Mode.RecordBased; - } - - public Engine(Project project) { - _project = project; - } - - public Mode getMode() { - return _mode; - } - public void setMode(Mode mode) { - _mode = mode; - } - - public FilteredRows getAllRows() { - return new FilteredRows() { - @Override - public void accept(Project project, RowVisitor visitor) { - try { - visitor.start(project); - - int c = project.rows.size(); - for (int rowIndex = 0; rowIndex < c; rowIndex++) { - Row row = project.rows.get(rowIndex); - visitor.visit(project, rowIndex, row); - } - } finally { - visitor.end(project); - } - } - }; - } - - public FilteredRows getAllFilteredRows() { - return getFilteredRows(null); - } - - public FilteredRows getFilteredRows(Facet except) { - if (_mode == Mode.RecordBased) { - return new FilteredRecordsAsFilteredRows(getFilteredRecords(except)); - } else if (_mode == Mode.RowBased) { - ConjunctiveFilteredRows cfr = new ConjunctiveFilteredRows(); - for (Facet facet : _facets) { - if (facet != except) { - RowFilter rowFilter = facet.getRowFilter(_project); - if (rowFilter != null) { - cfr.add(rowFilter); - } - } - } - return cfr; - } - throw new InternalError("Unknown mode."); - } - - public FilteredRecords getAllRecords() { - return new FilteredRecords() { - @Override - public void accept(Project project, RecordVisitor visitor) { - try { - visitor.start(project); - - int c = project.recordModel.getRecordCount(); - for (int r = 0; r < c; r++) { - visitor.visit(project, project.recordModel.getRecord(r)); - } - } finally { - visitor.end(project); - } - } - }; - } - - public FilteredRecords getFilteredRecords() { - return getFilteredRecords(null); - } - - public FilteredRecords getFilteredRecords(Facet except) { - if (_mode == Mode.RecordBased) { - ConjunctiveFilteredRecords cfr = new ConjunctiveFilteredRecords(); - for (Facet facet : _facets) { - if (facet != except) { - RecordFilter recordFilter = facet.getRecordFilter(_project); - if (recordFilter != null) { - cfr.add(recordFilter); - } - } - } - return cfr; - } - throw new InternalError("This method should not be called when the engine is not in record mode."); - } - - public void initializeFromJSON(JSONObject o) throws Exception { - if (o == null) { - return; - } - - if (o.has("facets") && !o.isNull("facets")) { - JSONArray a = o.getJSONArray("facets"); - int length = a.length(); - - for (int i = 0; i < length; i++) { - JSONObject fo = a.getJSONObject(i); - String type = fo.has("type") ? fo.getString("type") : "list"; - - Facet facet = null; - if ("list".equals(type)) { - facet = new ListFacet(); - } else if ("range".equals(type)) { - facet = new RangeFacet(); - } else if ("timerange".equals(type)) { - facet = new TimeRangeFacet(); - } else if ("scatterplot".equals(type)) { - facet = new ScatterplotFacet(); - } else if ("text".equals(type)) { - facet = new TextSearchFacet(); - } - - if (facet != null) { - facet.initializeFromJSON(_project, fo); - _facets.add(facet); - } - } - } - - // for backward compatibility - if (o.has(INCLUDE_DEPENDENT) && !o.isNull(INCLUDE_DEPENDENT)) { - _mode = o.getBoolean(INCLUDE_DEPENDENT) ? Mode.RecordBased : Mode.RowBased; - } - - if (o.has(MODE) && !o.isNull(MODE)) { - _mode = MODE_ROW_BASED.equals(o.getString(MODE)) ? Mode.RowBased : Mode.RecordBased; - } - } - - public void computeFacets() throws JSONException { - if (_mode == Mode.RowBased) { - for (Facet facet : _facets) { - FilteredRows filteredRows = getFilteredRows(facet); - - facet.computeChoices(_project, filteredRows); - } - } else if (_mode == Mode.RecordBased) { - for (Facet facet : _facets) { - FilteredRecords filteredRecords = getFilteredRecords(facet); - - facet.computeChoices(_project, filteredRecords); - } - } else { - throw new InternalError("Unknown mode."); - } - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("facets"); - writer.array(); - for (Facet facet : _facets) { - facet.write(writer, options); - } - writer.endArray(); - writer.key(MODE); writer.value(_mode == Mode.RowBased ? MODE_ROW_BASED : MODE_RECORD_BASED); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/browsing/FilteredRecords.java b/main/src/com/google/gridworks/browsing/FilteredRecords.java deleted file mode 100644 index 87d9629a2..000000000 --- a/main/src/com/google/gridworks/browsing/FilteredRecords.java +++ /dev/null @@ -1,18 +0,0 @@ -package com.google.gridworks.browsing; - -import com.google.gridworks.model.Project; - -/** - * Interface for anything that can decide which records match and which don't - * based on some particular criteria. - */ -public interface FilteredRecords { - /** - * Go through the records of the given project, determine which match and which don't, - * and call visitor.visit() on those that match - * - * @param project - * @param visitor - */ - public void accept(Project project, RecordVisitor visitor); -} diff --git a/main/src/com/google/gridworks/browsing/FilteredRows.java b/main/src/com/google/gridworks/browsing/FilteredRows.java deleted file mode 100644 index dcb7426a1..000000000 --- a/main/src/com/google/gridworks/browsing/FilteredRows.java +++ /dev/null @@ -1,19 +0,0 @@ -package com.google.gridworks.browsing; - -import com.google.gridworks.model.Project; - -/** - * Interface for anything that can decide which rows match and which rows don't match - * based on some particular criteria. - */ -public interface FilteredRows { - /** - * Go through the rows of the given project, determine which match and which don't, - * and call visitor.visit() on those that match, and possibly their context and - * dependent rows. - * - * @param project - * @param visitor - */ - public void accept(Project project, RowVisitor visitor); -} diff --git a/main/src/com/google/gridworks/browsing/RecordFilter.java b/main/src/com/google/gridworks/browsing/RecordFilter.java deleted file mode 100644 index 057b8b024..000000000 --- a/main/src/com/google/gridworks/browsing/RecordFilter.java +++ /dev/null @@ -1,12 +0,0 @@ -package com.google.gridworks.browsing; - -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Record; - -/** - * Interface for judging if a particular record matches or doesn't match some - * particular criterion, such as a facet constraint. - */ -public interface RecordFilter { - public boolean filterRecord(Project project, Record record); -} diff --git a/main/src/com/google/gridworks/browsing/RecordVisitor.java b/main/src/com/google/gridworks/browsing/RecordVisitor.java deleted file mode 100644 index 8b318e2d7..000000000 --- a/main/src/com/google/gridworks/browsing/RecordVisitor.java +++ /dev/null @@ -1,19 +0,0 @@ -package com.google.gridworks.browsing; - -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Record; - -/** - * Interface for visiting records one by one. The records visited are only those that match some - * particular criteria, such as facets' constraints. - */ -public interface RecordVisitor { - public void start(Project project); // called before any visit() call - - public boolean visit( - Project project, - Record record - ); - - public void end(Project project); // called after all visit() calls -} diff --git a/main/src/com/google/gridworks/browsing/RowFilter.java b/main/src/com/google/gridworks/browsing/RowFilter.java deleted file mode 100644 index 72c7f2aba..000000000 --- a/main/src/com/google/gridworks/browsing/RowFilter.java +++ /dev/null @@ -1,12 +0,0 @@ -package com.google.gridworks.browsing; - -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; - -/** - * Interface for judging if a particular row matches or doesn't match some - * particular criterion, such as a facet constraint. - */ -public interface RowFilter { - public boolean filterRow(Project project, int rowIndex, Row row); -} diff --git a/main/src/com/google/gridworks/browsing/RowVisitor.java b/main/src/com/google/gridworks/browsing/RowVisitor.java deleted file mode 100644 index c1916be78..000000000 --- a/main/src/com/google/gridworks/browsing/RowVisitor.java +++ /dev/null @@ -1,20 +0,0 @@ -package com.google.gridworks.browsing; - -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; - -/** - * Interface for visiting rows one by one. The rows visited are only those that match some - * particular criteria, such as facets' constraints. - */ -public interface RowVisitor { - public void start(Project project); // called before any visit() call - - public boolean visit( - Project project, - int rowIndex, // zero-based row index - Row row - ); - - public void end(Project project); // called after all visit() calls -} diff --git a/main/src/com/google/gridworks/browsing/facets/Facet.java b/main/src/com/google/gridworks/browsing/facets/Facet.java deleted file mode 100644 index 15114f646..000000000 --- a/main/src/com/google/gridworks/browsing/facets/Facet.java +++ /dev/null @@ -1,25 +0,0 @@ -package com.google.gridworks.browsing.facets; - -import org.json.JSONObject; - -import com.google.gridworks.Jsonizable; -import com.google.gridworks.browsing.FilteredRecords; -import com.google.gridworks.browsing.FilteredRows; -import com.google.gridworks.browsing.RecordFilter; -import com.google.gridworks.browsing.RowFilter; -import com.google.gridworks.model.Project; - -/** - * Interface of facets. - */ -public interface Facet extends Jsonizable { - public RowFilter getRowFilter(Project project); - - public RecordFilter getRecordFilter(Project project); - - public void computeChoices(Project project, FilteredRows filteredRows); - - public void computeChoices(Project project, FilteredRecords filteredRecords); - - public void initializeFromJSON(Project project, JSONObject o) throws Exception; -} diff --git a/main/src/com/google/gridworks/browsing/facets/ListFacet.java b/main/src/com/google/gridworks/browsing/facets/ListFacet.java deleted file mode 100644 index e698f81b3..000000000 --- a/main/src/com/google/gridworks/browsing/facets/ListFacet.java +++ /dev/null @@ -1,255 +0,0 @@ -package com.google.gridworks.browsing.facets; - -import java.util.LinkedList; -import java.util.List; -import java.util.Properties; - -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.ProjectManager; -import com.google.gridworks.browsing.DecoratedValue; -import com.google.gridworks.browsing.FilteredRecords; -import com.google.gridworks.browsing.FilteredRows; -import com.google.gridworks.browsing.RecordFilter; -import com.google.gridworks.browsing.RowFilter; -import com.google.gridworks.browsing.filters.AllRowsRecordFilter; -import com.google.gridworks.browsing.filters.AnyRowRecordFilter; -import com.google.gridworks.browsing.filters.ExpressionEqualRowFilter; -import com.google.gridworks.browsing.util.ExpressionNominalValueGrouper; -import com.google.gridworks.expr.Evaluable; -import com.google.gridworks.expr.MetaParser; -import com.google.gridworks.expr.ParsingException; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.util.JSONUtilities; - -public class ListFacet implements Facet { - /* - * Configuration - */ - protected String _name; - protected String _expression; - protected String _columnName; - protected boolean _invert; - - // If true, then facet won't show the blank and error choices - protected boolean _omitBlank; - protected boolean _omitError; - - protected List _selection = new LinkedList(); - protected boolean _selectBlank; - protected boolean _selectError; - - /* - * Derived configuration - */ - protected int _cellIndex; - protected Evaluable _eval; - protected String _errorMessage; - - /* - * Computed results - */ - protected List _choices = new LinkedList(); - protected int _blankCount; - protected int _errorCount; - - public ListFacet() { - } - - @Override - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("name"); writer.value(_name); - writer.key("expression"); writer.value(_expression); - writer.key("columnName"); writer.value(_columnName); - writer.key("invert"); writer.value(_invert); - - if (_errorMessage != null) { - writer.key("error"); writer.value(_errorMessage); - } else if (_choices.size() > getLimit()) { - writer.key("error"); writer.value("Too many choices"); - } else { - writer.key("choices"); writer.array(); - for (NominalFacetChoice choice : _choices) { - choice.write(writer, options); - } - writer.endArray(); - - if (!_omitBlank && (_selectBlank || _blankCount > 0)) { - writer.key("blankChoice"); - writer.object(); - writer.key("s"); writer.value(_selectBlank); - writer.key("c"); writer.value(_blankCount); - writer.endObject(); - } - if (!_omitError && (_selectError || _errorCount > 0)) { - writer.key("errorChoice"); - writer.object(); - writer.key("s"); writer.value(_selectError); - writer.key("c"); writer.value(_errorCount); - writer.endObject(); - } - } - - writer.endObject(); - } - - protected int getLimit() { - Object v = ProjectManager.singleton.getPreferenceStore().get("ui.browsing.listFacet.limit"); - if (v != null) { - if (v instanceof Number) { - return ((Number) v).intValue(); - } else { - try { - int n = Integer.parseInt(v.toString()); - return n; - } catch (NumberFormatException e) { - // ignore - } - } - } - return 2000; - } - - @Override - public void initializeFromJSON(Project project, JSONObject o) throws Exception { - _name = o.getString("name"); - _expression = o.getString("expression"); - _columnName = o.getString("columnName"); - _invert = o.has("invert") && o.getBoolean("invert"); - - if (_columnName.length() > 0) { - Column column = project.columnModel.getColumnByName(_columnName); - if (column != null) { - _cellIndex = column.getCellIndex(); - } else { - _errorMessage = "No column named " + _columnName; - } - } else { - _cellIndex = -1; - } - - try { - _eval = MetaParser.parse(_expression); - } catch (ParsingException e) { - _errorMessage = e.getMessage(); - } - - _selection.clear(); - - JSONArray a = o.getJSONArray("selection"); - int length = a.length(); - - for (int i = 0; i < length; i++) { - JSONObject oc = a.getJSONObject(i); - JSONObject ocv = oc.getJSONObject("v"); - - DecoratedValue decoratedValue = new DecoratedValue( - ocv.get("v"), ocv.getString("l")); - - NominalFacetChoice nominalFacetChoice = new NominalFacetChoice(decoratedValue); - nominalFacetChoice.selected = true; - - _selection.add(nominalFacetChoice); - } - - _omitBlank = JSONUtilities.getBoolean(o, "omitBlank", false); - _omitError = JSONUtilities.getBoolean(o, "omitError", false); - - _selectBlank = JSONUtilities.getBoolean(o, "selectBlank", false); - _selectError = JSONUtilities.getBoolean(o, "selectError", false); - } - - @Override - public RowFilter getRowFilter(Project project) { - return - _eval == null || - _errorMessage != null || - (_selection.size() == 0 && !_selectBlank && !_selectError) ? - null : - new ExpressionEqualRowFilter( - _eval, - _columnName, - _cellIndex, - createMatches(), - _selectBlank, - _selectError, - _invert); - } - - @Override - public RecordFilter getRecordFilter(Project project) { - RowFilter rowFilter = getRowFilter(project); - return rowFilter == null ? null : - (_invert ? - new AllRowsRecordFilter(rowFilter) : - new AnyRowRecordFilter(rowFilter)); - } - - @Override - public void computeChoices(Project project, FilteredRows filteredRows) { - if (_eval != null && _errorMessage == null) { - ExpressionNominalValueGrouper grouper = - new ExpressionNominalValueGrouper(_eval, _columnName, _cellIndex); - - filteredRows.accept(project, grouper); - - postProcessGrouper(grouper); - } - } - - @Override - public void computeChoices(Project project, FilteredRecords filteredRecords) { - if (_eval != null && _errorMessage == null) { - ExpressionNominalValueGrouper grouper = - new ExpressionNominalValueGrouper(_eval, _columnName, _cellIndex); - - filteredRecords.accept(project, grouper); - - postProcessGrouper(grouper); - } - } - - protected void postProcessGrouper(ExpressionNominalValueGrouper grouper) { - _choices.clear(); - _choices.addAll(grouper.choices.values()); - - for (NominalFacetChoice choice : _selection) { - String valueString = choice.decoratedValue.value.toString(); - - if (grouper.choices.containsKey(valueString)) { - grouper.choices.get(valueString).selected = true; - } else { - /* - * A selected choice can have zero count if it is selected together - * with other choices, and some other facets' constraints eliminate - * all rows projected to this choice altogether. For example, if you - * select both "car" and "bicycle" in the "type of vehicle" facet, and - * then constrain the "wheels" facet to more than 2, then the "bicycle" - * choice now has zero count even if it's still selected. The grouper - * won't be able to detect the "bicycle" choice, so we need to inject - * that choice into the choice list ourselves. - */ - choice.count = 0; - _choices.add(choice); - } - } - - _blankCount = grouper.blankCount; - _errorCount = grouper.errorCount; - } - - protected Object[] createMatches() { - Object[] a = new Object[_selection.size()]; - for (int i = 0; i < a.length; i++) { - a[i] = _selection.get(i).decoratedValue.value; - } - return a; - } -} diff --git a/main/src/com/google/gridworks/browsing/facets/NominalFacetChoice.java b/main/src/com/google/gridworks/browsing/facets/NominalFacetChoice.java deleted file mode 100644 index 25bdab7ff..000000000 --- a/main/src/com/google/gridworks/browsing/facets/NominalFacetChoice.java +++ /dev/null @@ -1,32 +0,0 @@ -package com.google.gridworks.browsing.facets; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.Jsonizable; -import com.google.gridworks.browsing.DecoratedValue; - -/** - * Store a facet choice that has a decorated value, a count of matched rows, - * and a flag of whether it has been selected. - */ -public class NominalFacetChoice implements Jsonizable { - final public DecoratedValue decoratedValue; - public int count; - public boolean selected; - - public NominalFacetChoice(DecoratedValue decoratedValue) { - this.decoratedValue = decoratedValue; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - writer.object(); - writer.key("v"); decoratedValue.write(writer, options); - writer.key("c"); writer.value(count); - writer.key("s"); writer.value(selected); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/browsing/facets/RangeFacet.java b/main/src/com/google/gridworks/browsing/facets/RangeFacet.java deleted file mode 100644 index b1dd31c2f..000000000 --- a/main/src/com/google/gridworks/browsing/facets/RangeFacet.java +++ /dev/null @@ -1,259 +0,0 @@ -package com.google.gridworks.browsing.facets; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.browsing.FilteredRecords; -import com.google.gridworks.browsing.FilteredRows; -import com.google.gridworks.browsing.RecordFilter; -import com.google.gridworks.browsing.RowFilter; -import com.google.gridworks.browsing.filters.AnyRowRecordFilter; -import com.google.gridworks.browsing.filters.ExpressionNumberComparisonRowFilter; -import com.google.gridworks.browsing.util.ExpressionBasedRowEvaluable; -import com.google.gridworks.browsing.util.ExpressionNumericValueBinner; -import com.google.gridworks.browsing.util.NumericBinIndex; -import com.google.gridworks.browsing.util.NumericBinRecordIndex; -import com.google.gridworks.browsing.util.NumericBinRowIndex; -import com.google.gridworks.browsing.util.RowEvaluable; -import com.google.gridworks.expr.Evaluable; -import com.google.gridworks.expr.MetaParser; -import com.google.gridworks.expr.ParsingException; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.util.JSONUtilities; - -public class RangeFacet implements Facet { - /* - * Configuration, from the client side - */ - protected String _name; // name of facet - protected String _expression; // expression to compute numeric value(s) per row - protected String _columnName; // column to base expression on, if any - - protected double _from; // the numeric selection - protected double _to; - - protected boolean _selectNumeric; // whether the numeric selection applies, default true - protected boolean _selectNonNumeric; - protected boolean _selectBlank; - protected boolean _selectError; - - /* - * Derived configuration data - */ - protected int _cellIndex; - protected Evaluable _eval; - protected String _errorMessage; - protected boolean _selected; // false if we're certain that all rows will match - // and there isn't any filtering to do - - /* - * Computed data, to return to the client side - */ - protected double _min; - protected double _max; - protected double _step; - protected int[] _baseBins; - protected int[] _bins; - - protected int _baseNumericCount; - protected int _baseNonNumericCount; - protected int _baseBlankCount; - protected int _baseErrorCount; - - protected int _numericCount; - protected int _nonNumericCount; - protected int _blankCount; - protected int _errorCount; - - public RangeFacet() { - } - - protected static final String MIN = "min"; - protected static final String MAX = "max"; - protected static final String TO = "to"; - protected static final String FROM = "from"; - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("name"); writer.value(_name); - writer.key("expression"); writer.value(_expression); - writer.key("columnName"); writer.value(_columnName); - - if (_errorMessage != null) { - writer.key("error"); writer.value(_errorMessage); - } else { - if (!Double.isInfinite(_min) && !Double.isInfinite(_max)) { - writer.key(MIN); writer.value(_min); - writer.key(MAX); writer.value(_max); - writer.key("step"); writer.value(_step); - - writer.key("bins"); writer.array(); - for (int b : _bins) { - writer.value(b); - } - writer.endArray(); - - writer.key("baseBins"); writer.array(); - for (int b : _baseBins) { - writer.value(b); - } - writer.endArray(); - - writer.key(FROM); writer.value(_from); - writer.key(TO); writer.value(_to); - } - - writer.key("baseNumericCount"); writer.value(_baseNumericCount); - writer.key("baseNonNumericCount"); writer.value(_baseNonNumericCount); - writer.key("baseBlankCount"); writer.value(_baseBlankCount); - writer.key("baseErrorCount"); writer.value(_baseErrorCount); - - writer.key("numericCount"); writer.value(_numericCount); - writer.key("nonNumericCount"); writer.value(_nonNumericCount); - writer.key("blankCount"); writer.value(_blankCount); - writer.key("errorCount"); writer.value(_errorCount); - } - writer.endObject(); - } - - public void initializeFromJSON(Project project, JSONObject o) throws Exception { - _name = o.getString("name"); - _expression = o.getString("expression"); - _columnName = o.getString("columnName"); - - if (_columnName.length() > 0) { - Column column = project.columnModel.getColumnByName(_columnName); - if (column != null) { - _cellIndex = column.getCellIndex(); - } else { - _errorMessage = "No column named " + _columnName; - } - } else { - _cellIndex = -1; - } - - try { - _eval = MetaParser.parse(_expression); - } catch (ParsingException e) { - _errorMessage = e.getMessage(); - } - - if (o.has(FROM) || o.has(TO)) { - _from = o.has(FROM) ? o.getDouble(FROM) : _min; - _to = o.has(TO) ? o.getDouble(TO) : _max; - _selected = true; - } - - _selectNumeric = JSONUtilities.getBoolean(o, "selectNumeric", true); - _selectNonNumeric = JSONUtilities.getBoolean(o, "selectNonNumeric", true); - _selectBlank = JSONUtilities.getBoolean(o, "selectBlank", true); - _selectError = JSONUtilities.getBoolean(o, "selectError", true); - - if (!_selectNumeric || !_selectNonNumeric || !_selectBlank || !_selectError) { - _selected = true; - } - } - - public RowFilter getRowFilter(Project project) { - if (_eval != null && _errorMessage == null && _selected) { - return new ExpressionNumberComparisonRowFilter( - getRowEvaluable(project), _selectNumeric, _selectNonNumeric, _selectBlank, _selectError) { - - protected boolean checkValue(double d) { - return d >= _from && d < _to; - }; - }; - } else { - return null; - } - } - - @Override - public RecordFilter getRecordFilter(Project project) { - RowFilter rowFilter = getRowFilter(project); - return rowFilter == null ? null : new AnyRowRecordFilter(rowFilter); - } - - public void computeChoices(Project project, FilteredRows filteredRows) { - if (_eval != null && _errorMessage == null) { - RowEvaluable rowEvaluable = getRowEvaluable(project); - - Column column = project.columnModel.getColumnByCellIndex(_cellIndex); - String key = "numeric-bin:row-based:" + _expression; - NumericBinIndex index = (NumericBinIndex) column.getPrecompute(key); - if (index == null) { - index = new NumericBinRowIndex(project, rowEvaluable); - column.setPrecompute(key, index); - } - - retrieveDataFromBaseBinIndex(index); - - ExpressionNumericValueBinner binner = - new ExpressionNumericValueBinner(rowEvaluable, index); - - filteredRows.accept(project, binner); - retrieveDataFromBinner(binner); - } - } - - public void computeChoices(Project project, FilteredRecords filteredRecords) { - if (_eval != null && _errorMessage == null) { - RowEvaluable rowEvaluable = getRowEvaluable(project); - - Column column = project.columnModel.getColumnByCellIndex(_cellIndex); - String key = "numeric-bin:record-based:" + _expression; - NumericBinIndex index = (NumericBinIndex) column.getPrecompute(key); - if (index == null) { - index = new NumericBinRecordIndex(project, rowEvaluable); - column.setPrecompute(key, index); - } - - retrieveDataFromBaseBinIndex(index); - - ExpressionNumericValueBinner binner = - new ExpressionNumericValueBinner(rowEvaluable, index); - - filteredRecords.accept(project, binner); - - retrieveDataFromBinner(binner); - } - } - - protected RowEvaluable getRowEvaluable(Project project) { - return new ExpressionBasedRowEvaluable(_columnName, _cellIndex, _eval); - } - - protected void retrieveDataFromBaseBinIndex(NumericBinIndex index) { - _min = index.getMin(); - _max = index.getMax(); - _step = index.getStep(); - _baseBins = index.getBins(); - - _baseNumericCount = index.getNumericRowCount(); - _baseNonNumericCount = index.getNonNumericRowCount(); - _baseBlankCount = index.getBlankRowCount(); - _baseErrorCount = index.getErrorRowCount(); - - if (_selected) { - _from = Math.max(_from, _min); - _to = Math.min(_to, _max); - } else { - _from = _min; - _to = _max; - } - } - - protected void retrieveDataFromBinner(ExpressionNumericValueBinner binner) { - _bins = binner.bins; - _numericCount = binner.numericCount; - _nonNumericCount = binner.nonNumericCount; - _blankCount = binner.blankCount; - _errorCount = binner.errorCount; - } -} diff --git a/main/src/com/google/gridworks/browsing/facets/ScatterplotDrawingRowVisitor.java b/main/src/com/google/gridworks/browsing/facets/ScatterplotDrawingRowVisitor.java deleted file mode 100644 index 9b8a6418c..000000000 --- a/main/src/com/google/gridworks/browsing/facets/ScatterplotDrawingRowVisitor.java +++ /dev/null @@ -1,133 +0,0 @@ -package com.google.gridworks.browsing.facets; - -import java.awt.BasicStroke; -import java.awt.Color; -import java.awt.Graphics2D; -import java.awt.RenderingHints; -import java.awt.geom.AffineTransform; -import java.awt.geom.Point2D; -import java.awt.geom.Rectangle2D; -import java.awt.image.BufferedImage; -import java.awt.image.RenderedImage; - -import com.google.gridworks.browsing.RecordVisitor; -import com.google.gridworks.browsing.RowVisitor; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Record; -import com.google.gridworks.model.Row; - -public class ScatterplotDrawingRowVisitor implements RowVisitor, RecordVisitor { - - int col_x; - int col_y; - int dim_x; - int dim_y; - int rotation; - - double l; - double dot; - - double min_x; - double max_x; - double min_y; - double max_y; - - BufferedImage image; - Graphics2D g2; - - AffineTransform r; - - public ScatterplotDrawingRowVisitor( - int col_x, int col_y, double min_x, double max_x, double min_y, double max_y, - int size, int dim_x, int dim_y, int rotation, double dot, Color color) - { - this.col_x = col_x; - this.col_y = col_y; - this.min_x = min_x; - this.min_y = min_y; - this.max_x = max_x; - this.max_y = max_y; - this.dot = dot; - this.dim_x = dim_x; - this.dim_y = dim_y; - this.rotation = rotation; - - l = (double) size; - r = ScatterplotFacet.createRotationMatrix(rotation, l); - - image = new BufferedImage(size, size, BufferedImage.TYPE_4BYTE_ABGR); - g2 = (Graphics2D) image.getGraphics(); - g2.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON); - g2.setStroke(new BasicStroke(1.0f)); - - AffineTransform t = AffineTransform.getTranslateInstance(0, l); - t.scale(1, -1); - - g2.setTransform(t); - g2.setColor(color); - g2.setPaint(color); - - if (r != null) { - /* - * Fill in the negative quadrants to give a hint of how the plot has been rotated. - */ - Graphics2D g2r = (Graphics2D) g2.create(); - g2r.transform(r); - - g2r.setPaint(Color.lightGray); - g2r.fillRect(-size, 0, size, size); - g2r.fillRect(0, -size, size, size); - g2r.dispose(); - } - } - - public void setColor(Color color) { - g2.setColor(color); - g2.setPaint(color); - } - - @Override - public void start(Project project) { - // nothing to do - } - - @Override - public void end(Project project) { - // nothing to do - } - - @Override - public boolean visit(Project project, int rowIndex, Row row) { - Cell cellx = row.getCell(col_x); - Cell celly = row.getCell(col_y); - if ((cellx != null && cellx.value != null && cellx.value instanceof Number) && - (celly != null && celly.value != null && celly.value instanceof Number)) - { - double xv = ((Number) cellx.value).doubleValue(); - double yv = ((Number) celly.value).doubleValue(); - - Point2D.Double p = new Point2D.Double(xv,yv); - - p = ScatterplotFacet.translateCoordinates( - p, min_x, max_x, min_y, max_y, dim_x, dim_y, l, r); - - g2.fill(new Rectangle2D.Double(p.x - dot / 2, p.y - dot / 2, dot, dot)); - } - - return false; - } - - @Override - public boolean visit(Project project, Record record) { - for (int r = record.fromRowIndex; r < record.toRowIndex; r++) { - visit(project, r, project.rows.get(r)); - } - return false; - } - - public RenderedImage getImage() { - return image; - } -} - diff --git a/main/src/com/google/gridworks/browsing/facets/ScatterplotFacet.java b/main/src/com/google/gridworks/browsing/facets/ScatterplotFacet.java deleted file mode 100644 index caa79f02d..000000000 --- a/main/src/com/google/gridworks/browsing/facets/ScatterplotFacet.java +++ /dev/null @@ -1,452 +0,0 @@ -package com.google.gridworks.browsing.facets; - -import java.awt.Color; -import java.awt.geom.AffineTransform; -import java.awt.geom.Point2D; -import java.awt.image.BufferedImage; -import java.awt.image.RenderedImage; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.util.Properties; - -import javax.imageio.ImageIO; - -import org.apache.commons.codec.binary.Base64; -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.google.gridworks.browsing.FilteredRecords; -import com.google.gridworks.browsing.FilteredRows; -import com.google.gridworks.browsing.RecordFilter; -import com.google.gridworks.browsing.RowFilter; -import com.google.gridworks.browsing.filters.AnyRowRecordFilter; -import com.google.gridworks.browsing.filters.DualExpressionsNumberComparisonRowFilter; -import com.google.gridworks.browsing.util.ExpressionBasedRowEvaluable; -import com.google.gridworks.browsing.util.NumericBinIndex; -import com.google.gridworks.browsing.util.NumericBinRecordIndex; -import com.google.gridworks.browsing.util.NumericBinRowIndex; -import com.google.gridworks.expr.Evaluable; -import com.google.gridworks.expr.MetaParser; -import com.google.gridworks.expr.ParsingException; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; - -public class ScatterplotFacet implements Facet { - - public static final int LIN = 0; - public static final int LOG = 1; - - public static final int NO_ROTATION = 0; - public static final int ROTATE_CW = 1; - public static final int ROTATE_CCW = 2; - - /* - * Configuration, from the client side - */ - protected String name; // name of facet - - protected String expression_x; // expression to compute the x numeric value(s) per row - protected String expression_y; // expression to compute the y numeric value(s) per row - protected String columnName_x; // column to base the x expression on, if any - protected String columnName_y; // column to base the y expression on, if any - - protected int size; - protected int dim_x; - protected int dim_y; - protected int rotation; - - protected double l; - protected double dot; - - protected String image; - - protected String color_str; - protected Color color; - - protected double from_x; // the numeric selection for the x axis, from 0 to 1 - protected double to_x; - protected double from_y; // the numeric selection for the y axis, from 0 to 1 - protected double to_y; - - /* - * Derived configuration data - */ - protected int columnIndex_x; - protected int columnIndex_y; - protected Evaluable eval_x; - protected Evaluable eval_y; - protected String errorMessage_x; - protected String errorMessage_y; - - protected double min_x; - protected double max_x; - protected double min_y; - protected double max_y; - protected AffineTransform t; - - protected boolean selected; // false if we're certain that all rows will match - // and there isn't any filtering to do - - public static final String NAME = "name"; - public static final String IMAGE = "image"; - public static final String COLOR = "color"; - public static final String BASE_COLOR = "base_color"; - public static final String SIZE = "l"; - public static final String ROTATION = "r"; - public static final String DOT = "dot"; - public static final String DIM_X = "dim_x"; - public static final String DIM_Y = "dim_y"; - - public static final String X_COLUMN_NAME = "cx"; - public static final String X_EXPRESSION = "ex"; - public static final String MIN_X = "min_x"; - public static final String MAX_X = "max_x"; - public static final String TO_X = "to_x"; - public static final String FROM_X = "from_x"; - public static final String ERROR_X = "error_x"; - - public static final String Y_COLUMN_NAME = "cy"; - public static final String Y_EXPRESSION = "ey"; - public static final String MIN_Y = "min_y"; - public static final String MAX_Y = "max_y"; - public static final String TO_Y = "to_y"; - public static final String FROM_Y = "from_y"; - public static final String ERROR_Y = "error_y"; - - private static final boolean IMAGE_URI = false; - - public static String EMPTY_IMAGE; - - final static Logger logger = LoggerFactory.getLogger("scatterplot_facet"); - - static { - try { - EMPTY_IMAGE = serializeImage(new BufferedImage(1, 1, BufferedImage.TYPE_4BYTE_ABGR)); - } catch (IOException e) { - EMPTY_IMAGE = ""; - } - } - - public void write(JSONWriter writer, Properties options) throws JSONException { - - writer.object(); - - writer.key(NAME); writer.value(name); - writer.key(X_COLUMN_NAME); writer.value(columnName_x); - writer.key(X_EXPRESSION); writer.value(expression_x); - writer.key(Y_COLUMN_NAME); writer.value(columnName_y); - writer.key(Y_EXPRESSION); writer.value(expression_y); - writer.key(SIZE); writer.value(size); - writer.key(DOT); writer.value(dot); - writer.key(ROTATION); writer.value(rotation); - writer.key(DIM_X); writer.value(dim_x); - writer.key(DIM_Y); writer.value(dim_y); - writer.key(COLOR); writer.value(color_str); - - if (IMAGE_URI) { - writer.key(IMAGE); writer.value(image); - } - - if (errorMessage_x != null) { - writer.key(ERROR_X); writer.value(errorMessage_x); - } else { - if (!Double.isInfinite(min_x) && !Double.isInfinite(max_x)) { - writer.key(FROM_X); writer.value(from_x); - writer.key(TO_X); writer.value(to_x); - } - } - - if (errorMessage_y != null) { - writer.key(ERROR_Y); writer.value(errorMessage_y); - } else { - if (!Double.isInfinite(min_y) && !Double.isInfinite(max_y)) { - writer.key(FROM_Y); writer.value(from_y); - writer.key(TO_Y); writer.value(to_y); - } - } - - writer.endObject(); - } - - public void initializeFromJSON(Project project, JSONObject o) throws Exception { - name = o.getString(NAME); - l = size = (o.has(SIZE)) ? o.getInt(SIZE) : 100; - dot = (o.has(DOT)) ? o.getInt(DOT) : 0.5d; - - dim_x = (o.has(DIM_X)) ? getAxisDim(o.getString(DIM_X)) : LIN; - if (o.has(FROM_X) && o.has(TO_X)) { - from_x = o.getDouble(FROM_X); - to_x = o.getDouble(TO_X); - selected = true; - } else { - from_x = 0; - to_x = 1; - } - - dim_y = (o.has(DIM_Y)) ? getAxisDim(o.getString(DIM_Y)) : LIN; - if (o.has(FROM_Y) && o.has(TO_Y)) { - from_y = o.getDouble(FROM_Y); - to_y = o.getDouble(TO_Y); - selected = true; - } else { - from_y = 0; - to_y = 1; - } - - rotation = (o.has(ROTATION)) ? getRotation(o.getString(ROTATION)) : NO_ROTATION; - t = createRotationMatrix(rotation, l); - - color_str = (o.has(COLOR)) ? o.getString(COLOR) : "000000"; - color = new Color(Integer.parseInt(color_str,16)); - - columnName_x = o.getString(X_COLUMN_NAME); - expression_x = o.getString(X_EXPRESSION); - - if (columnName_x.length() > 0) { - Column x_column = project.columnModel.getColumnByName(columnName_x); - if (x_column != null) { - columnIndex_x = x_column.getCellIndex(); - - NumericBinIndex index_x = ScatterplotFacet.getBinIndex(project, x_column, eval_x, expression_x); - min_x = index_x.getMin(); - max_x = index_x.getMax(); - } else { - errorMessage_x = "No column named " + columnName_x; - } - } else { - columnIndex_x = -1; - } - - try { - eval_x = MetaParser.parse(expression_x); - } catch (ParsingException e) { - errorMessage_x = e.getMessage(); - } - - columnName_y = o.getString(Y_COLUMN_NAME); - expression_y = o.getString(Y_EXPRESSION); - - if (columnName_y.length() > 0) { - Column y_column = project.columnModel.getColumnByName(columnName_y); - if (y_column != null) { - columnIndex_y = y_column.getCellIndex(); - - NumericBinIndex index_y = ScatterplotFacet.getBinIndex(project, y_column, eval_y, expression_y); - min_y = index_y.getMin(); - max_y = index_y.getMax(); - } else { - errorMessage_y = "No column named " + columnName_y; - } - } else { - columnIndex_y = -1; - } - - try { - eval_y = MetaParser.parse(expression_y); - } catch (ParsingException e) { - errorMessage_y = e.getMessage(); - } - - } - - public RowFilter getRowFilter(Project project) { - if (selected && - eval_x != null && errorMessage_x == null && - eval_y != null && errorMessage_y == null) - { - return new DualExpressionsNumberComparisonRowFilter( - eval_x, columnName_x, columnIndex_x, eval_y, columnName_y, columnIndex_y) { - - double from_x_pixels = from_x * l; - double to_x_pixels = to_x * l; - double from_y_pixels = from_y * l; - double to_y_pixels = to_y * l; - - protected boolean checkValues(double x, double y) { - Point2D.Double p = new Point2D.Double(x,y); - p = translateCoordinates(p, min_x, max_x, min_y, max_y, dim_x, dim_y, l, t); - return p.x >= from_x_pixels && p.x <= to_x_pixels && p.y >= from_y_pixels && p.y <= to_y_pixels; - }; - }; - } else { - return null; - } - } - - @Override - public RecordFilter getRecordFilter(Project project) { - RowFilter rowFilter = getRowFilter(project); - return rowFilter == null ? null : new AnyRowRecordFilter(rowFilter); - } - - public void computeChoices(Project project, FilteredRows filteredRows) { - if (eval_x != null && eval_y != null && errorMessage_x == null && errorMessage_y == null) { - Column column_x = project.columnModel.getColumnByCellIndex(columnIndex_x); - NumericBinIndex index_x = getBinIndex(project, column_x, eval_x, expression_x, "row-based"); - - Column column_y = project.columnModel.getColumnByCellIndex(columnIndex_y); - NumericBinIndex index_y = getBinIndex(project, column_y, eval_y, expression_y, "row-based"); - - retrieveDataFromBinIndices(index_x, index_y); - - if (IMAGE_URI) { - if (index_x.isNumeric() && index_y.isNumeric()) { - ScatterplotDrawingRowVisitor drawer = new ScatterplotDrawingRowVisitor( - columnIndex_x, columnIndex_y, min_x, max_x, min_y, max_y, - size, dim_x, dim_y, rotation, dot, color - ); - filteredRows.accept(project, drawer); - - try { - image = serializeImage(drawer.getImage()); - } catch (IOException e) { - logger.warn("Exception caught while generating the image", e); - } - } else { - image = EMPTY_IMAGE; - } - } - } - } - - public void computeChoices(Project project, FilteredRecords filteredRecords) { - if (eval_x != null && eval_y != null && errorMessage_x == null && errorMessage_y == null) { - Column column_x = project.columnModel.getColumnByCellIndex(columnIndex_x); - NumericBinIndex index_x = getBinIndex(project, column_x, eval_x, expression_x, "record-based"); - - Column column_y = project.columnModel.getColumnByCellIndex(columnIndex_y); - NumericBinIndex index_y = getBinIndex(project, column_y, eval_y, expression_y, "record-based"); - - retrieveDataFromBinIndices(index_x, index_y); - - if (IMAGE_URI) { - if (index_x.isNumeric() && index_y.isNumeric()) { - ScatterplotDrawingRowVisitor drawer = new ScatterplotDrawingRowVisitor( - columnIndex_x, columnIndex_y, min_x, max_x, min_y, max_y, - size, dim_x, dim_y, rotation, dot, color - ); - filteredRecords.accept(project, drawer); - - try { - image = serializeImage(drawer.getImage()); - } catch (IOException e) { - logger.warn("Exception caught while generating the image", e); - } - } else { - image = EMPTY_IMAGE; - } - } - } - } - - protected void retrieveDataFromBinIndices(NumericBinIndex index_x, NumericBinIndex index_y) { - min_x = index_x.getMin(); - max_x = index_x.getMax(); - - min_y = index_y.getMin(); - max_y = index_y.getMax(); - } - - public static String serializeImage(RenderedImage image) throws IOException { - ByteArrayOutputStream output = new ByteArrayOutputStream(4096); - ImageIO.write(image, "png", output); - output.close(); - String encoded = Base64.encodeBase64String(output.toByteArray()); - String url = "data:image/png;base64," + encoded; - return url; - } - - public static int getAxisDim(String type) { - return ("log".equals(type.toLowerCase())) ? LOG : LIN; - } - - public static int getRotation(String rotation) { - rotation = rotation.toLowerCase(); - if ("cw".equals(rotation) || "right".equals(rotation)) { - return ScatterplotFacet.ROTATE_CW; - } else if ("ccw".equals(rotation) || "left".equals(rotation)) { - return ScatterplotFacet.ROTATE_CCW; - } else { - return NO_ROTATION; - } - } - - public static NumericBinIndex getBinIndex(Project project, Column column, Evaluable eval, String expression) { - return getBinIndex(project, column, eval, expression, "row-based"); - } - - public static NumericBinIndex getBinIndex(Project project, Column column, Evaluable eval, String expression, String mode) { - String key = "numeric-bin:" + mode + ":" + expression; - if (eval == null) { - try { - eval = MetaParser.parse(expression); - } catch (ParsingException e) { - logger.warn("Error parsing expression",e); - } - } - NumericBinIndex index = (NumericBinIndex) column.getPrecompute(key); - if (index == null) { - index = "row-based".equals(mode) ? - new NumericBinRowIndex(project, new ExpressionBasedRowEvaluable(column.getName(), column.getCellIndex(), eval)) : - new NumericBinRecordIndex(project, new ExpressionBasedRowEvaluable(column.getName(), column.getCellIndex(), eval)); - - column.setPrecompute(key, index); - } - return index; - } - - private static double s_rotateScale = 1 / Math.sqrt(2.0); - - public static AffineTransform createRotationMatrix(int rotation, double l) { - if (rotation == ScatterplotFacet.ROTATE_CW) { - AffineTransform t = AffineTransform.getTranslateInstance(0, l / 2); - t.scale(s_rotateScale, s_rotateScale); - t.rotate(-Math.PI / 4); - return t; - } else if (rotation == ScatterplotFacet.ROTATE_CCW) { - AffineTransform t = AffineTransform.getTranslateInstance(l / 2, 0); - t.scale(s_rotateScale, s_rotateScale); - t.rotate(Math.PI / 4); - return t; - } else { - return null; - } - } - - public static Point2D.Double translateCoordinates( - Point2D.Double p, - double min_x, double max_x, double min_y, double max_y, - int dim_x, int dim_y, double l, AffineTransform t) { - - double x = p.x; - double y = p.y; - - double relative_x = x - min_x; - double range_x = max_x - min_x; - if (dim_x == ScatterplotFacet.LOG) { - x = Math.log10(relative_x + 1) * l / Math.log10(range_x + 1); - } else { - x = relative_x * l / range_x; - } - - double relative_y = y - min_y; - double range_y = max_y - min_y; - if (dim_y == ScatterplotFacet.LOG) { - y = Math.log10(relative_y + 1) * l / Math.log10(range_y + 1); - } else { - y = relative_y * l / range_y; - } - - p.x = x; - p.y = y; - if (t != null) { - t.transform(p, p); - } - - return p; - } - -} diff --git a/main/src/com/google/gridworks/browsing/facets/TextSearchFacet.java b/main/src/com/google/gridworks/browsing/facets/TextSearchFacet.java deleted file mode 100644 index b511fdfb8..000000000 --- a/main/src/com/google/gridworks/browsing/facets/TextSearchFacet.java +++ /dev/null @@ -1,120 +0,0 @@ -package com.google.gridworks.browsing.facets; - -import java.util.Properties; -import java.util.regex.Pattern; - -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.browsing.FilteredRecords; -import com.google.gridworks.browsing.FilteredRows; -import com.google.gridworks.browsing.RecordFilter; -import com.google.gridworks.browsing.RowFilter; -import com.google.gridworks.browsing.filters.AnyRowRecordFilter; -import com.google.gridworks.browsing.filters.ExpressionStringComparisonRowFilter; -import com.google.gridworks.expr.Evaluable; -import com.google.gridworks.gel.ast.VariableExpr; -import com.google.gridworks.model.Project; - -public class TextSearchFacet implements Facet { - /* - * Configuration - */ - protected String _name; - protected String _columnName; - protected String _query; - protected String _mode; - protected boolean _caseSensitive; - - /* - * Derived configuration - */ - protected int _cellIndex; - protected Pattern _pattern; - - public TextSearchFacet() { - } - - @Override - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("name"); writer.value(_name); - writer.key("columnName"); writer.value(_columnName); - writer.key("query"); writer.value(_query); - writer.key("mode"); writer.value(_mode); - writer.key("caseSensitive"); writer.value(_caseSensitive); - writer.endObject(); - } - - @Override - public void initializeFromJSON(Project project, JSONObject o) throws Exception { - _name = o.getString("name"); - _columnName = o.getString("columnName"); - - _cellIndex = project.columnModel.getColumnByName(_columnName).getCellIndex(); - - if (!o.isNull("query")) { - _query = o.getString("query"); - } - - _mode = o.getString("mode"); - _caseSensitive = o.getBoolean("caseSensitive"); - if (_query != null) { - if ("regex".equals(_mode)) { - try { - _pattern = Pattern.compile( - _query, - _caseSensitive ? 0 : Pattern.CASE_INSENSITIVE); - } catch (java.util.regex.PatternSyntaxException e) { - e.printStackTrace(); - } - } else if (!_caseSensitive) { - _query = _query.toLowerCase(); - } - } - } - - @Override - public RowFilter getRowFilter(Project project) { - if (_query == null || _query.length() == 0) { - return null; - } else if ("regex".equals(_mode) && _pattern == null) { - return null; - } - - Evaluable eval = new VariableExpr("value"); - - if ("regex".equals(_mode)) { - return new ExpressionStringComparisonRowFilter(eval, _columnName, _cellIndex) { - protected boolean checkValue(String s) { - return _pattern.matcher(s).find(); - }; - }; - } else { - return new ExpressionStringComparisonRowFilter(eval, _columnName, _cellIndex) { - protected boolean checkValue(String s) { - return (_caseSensitive ? s : s.toLowerCase()).contains(_query); - }; - }; - } - } - - @Override - public RecordFilter getRecordFilter(Project project) { - RowFilter rowFilter = getRowFilter(project); - return rowFilter == null ? null : new AnyRowRecordFilter(rowFilter); - } - - @Override - public void computeChoices(Project project, FilteredRows filteredRows) { - // nothing to do - } - - @Override - public void computeChoices(Project project, FilteredRecords filteredRecords) { - // nothing to do - } -} diff --git a/main/src/com/google/gridworks/browsing/facets/TimeRangeFacet.java b/main/src/com/google/gridworks/browsing/facets/TimeRangeFacet.java deleted file mode 100644 index 4ff350007..000000000 --- a/main/src/com/google/gridworks/browsing/facets/TimeRangeFacet.java +++ /dev/null @@ -1,201 +0,0 @@ -package com.google.gridworks.browsing.facets; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.browsing.FilteredRecords; -import com.google.gridworks.browsing.FilteredRows; -import com.google.gridworks.browsing.RowFilter; -import com.google.gridworks.browsing.filters.ExpressionTimeComparisonRowFilter; -import com.google.gridworks.browsing.util.ExpressionTimeValueBinner; -import com.google.gridworks.browsing.util.RowEvaluable; -import com.google.gridworks.browsing.util.TimeBinIndex; -import com.google.gridworks.browsing.util.TimeBinRecordIndex; -import com.google.gridworks.browsing.util.TimeBinRowIndex; -import com.google.gridworks.expr.MetaParser; -import com.google.gridworks.expr.ParsingException; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.util.JSONUtilities; - -public class TimeRangeFacet extends RangeFacet { - - protected boolean _selectTime; // whether the time selection applies, default true - protected boolean _selectNonTime; - - protected int _baseTimeCount; - protected int _baseNonTimeCount; - - protected int _timeCount; - protected int _nonTimeCount; - - public void write(JSONWriter writer, Properties options) throws JSONException { - - writer.object(); - writer.key("name"); writer.value(_name); - writer.key("expression"); writer.value(_expression); - writer.key("columnName"); writer.value(_columnName); - - if (_errorMessage != null) { - writer.key("error"); writer.value(_errorMessage); - } else { - if (!Double.isInfinite(_min) && !Double.isInfinite(_max)) { - writer.key(MIN); writer.value(_min); - writer.key(MAX); writer.value(_max); - writer.key("step"); writer.value(_step); - - writer.key("bins"); writer.array(); - for (int b : _bins) { - writer.value(b); - } - writer.endArray(); - - writer.key("baseBins"); writer.array(); - for (int b : _baseBins) { - writer.value(b); - } - writer.endArray(); - - writer.key(FROM); writer.value(_from); - writer.key(TO); writer.value(_to); - } - - writer.key("baseTimeCount"); writer.value(_baseTimeCount); - writer.key("baseNonTimeCount"); writer.value(_baseNonTimeCount); - writer.key("baseBlankCount"); writer.value(_baseBlankCount); - writer.key("baseErrorCount"); writer.value(_baseErrorCount); - - writer.key("timeCount"); writer.value(_timeCount); - writer.key("nonTimeCount"); writer.value(_nonTimeCount); - writer.key("blankCount"); writer.value(_blankCount); - writer.key("errorCount"); writer.value(_errorCount); - } - writer.endObject(); - } - - public void initializeFromJSON(Project project, JSONObject o) throws Exception { - _name = o.getString("name"); - _expression = o.getString("expression"); - _columnName = o.getString("columnName"); - - if (_columnName.length() > 0) { - Column column = project.columnModel.getColumnByName(_columnName); - if (column != null) { - _cellIndex = column.getCellIndex(); - } else { - _errorMessage = "No column named " + _columnName; - } - } else { - _cellIndex = -1; - } - - try { - _eval = MetaParser.parse(_expression); - } catch (ParsingException e) { - _errorMessage = e.getMessage(); - } - - if (o.has(FROM) || o.has(TO)) { - _from = o.has(FROM) ? o.getDouble(FROM) : _min; - _to = o.has(TO) ? o.getDouble(TO) : _max; - _selected = true; - } - - _selectTime = JSONUtilities.getBoolean(o, "selectTime", true); - _selectNonTime = JSONUtilities.getBoolean(o, "selectNonTime", true); - _selectBlank = JSONUtilities.getBoolean(o, "selectBlank", true); - _selectError = JSONUtilities.getBoolean(o, "selectError", true); - - if (!_selectTime || !_selectNonTime || !_selectBlank || !_selectError) { - _selected = true; - } - } - - public RowFilter getRowFilter(Project project) { - if (_eval != null && _errorMessage == null && _selected) { - return new ExpressionTimeComparisonRowFilter( - getRowEvaluable(project), _selectTime, _selectNonTime, _selectBlank, _selectError) { - - protected boolean checkValue(long t) { - return t >= _from && t < _to; - }; - }; - } else { - return null; - } - } - - public void computeChoices(Project project, FilteredRows filteredRows) { - if (_eval != null && _errorMessage == null) { - RowEvaluable rowEvaluable = getRowEvaluable(project); - - Column column = project.columnModel.getColumnByCellIndex(_cellIndex); - String key = "time-bin:row-based:" + _expression; - TimeBinIndex index = (TimeBinIndex) column.getPrecompute(key); - if (index == null) { - index = new TimeBinRowIndex(project, rowEvaluable); - column.setPrecompute(key, index); - } - - retrieveDataFromBaseBinIndex(index); - - ExpressionTimeValueBinner binner = new ExpressionTimeValueBinner(rowEvaluable, index); - - filteredRows.accept(project, binner); - retrieveDataFromBinner(binner); - } - } - - public void computeChoices(Project project, FilteredRecords filteredRecords) { - if (_eval != null && _errorMessage == null) { - RowEvaluable rowEvaluable = getRowEvaluable(project); - - Column column = project.columnModel.getColumnByCellIndex(_cellIndex); - String key = "time-bin:record-based:" + _expression; - TimeBinIndex index = (TimeBinIndex) column.getPrecompute(key); - if (index == null) { - index = new TimeBinRecordIndex(project, rowEvaluable); - column.setPrecompute(key, index); - } - - retrieveDataFromBaseBinIndex(index); - - ExpressionTimeValueBinner binner = new ExpressionTimeValueBinner(rowEvaluable, index); - - filteredRecords.accept(project, binner); - - retrieveDataFromBinner(binner); - } - } - - protected void retrieveDataFromBaseBinIndex(TimeBinIndex index) { - _min = index.getMin(); - _max = index.getMax(); - _step = index.getStep(); - _baseBins = index.getBins(); - - _baseTimeCount = index.getTimeRowCount(); - _baseNonTimeCount = index.getNonTimeRowCount(); - _baseBlankCount = index.getBlankRowCount(); - _baseErrorCount = index.getErrorRowCount(); - - if (_selected) { - _from = Math.max(_from, _min); - _to = Math.min(_to, _max); - } else { - _from = _min; - _to = _max; - } - } - - protected void retrieveDataFromBinner(ExpressionTimeValueBinner binner) { - _bins = binner.bins; - _timeCount = binner.timeCount; - _nonTimeCount = binner.nonTimeCount; - _blankCount = binner.blankCount; - _errorCount = binner.errorCount; - } -} diff --git a/main/src/com/google/gridworks/browsing/filters/AllRowsRecordFilter.java b/main/src/com/google/gridworks/browsing/filters/AllRowsRecordFilter.java deleted file mode 100644 index fbf522e25..000000000 --- a/main/src/com/google/gridworks/browsing/filters/AllRowsRecordFilter.java +++ /dev/null @@ -1,24 +0,0 @@ -package com.google.gridworks.browsing.filters; - -import com.google.gridworks.browsing.RecordFilter; -import com.google.gridworks.browsing.RowFilter; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Record; - -public class AllRowsRecordFilter implements RecordFilter { - final protected RowFilter _rowFilter; - - public AllRowsRecordFilter(RowFilter rowFilter) { - _rowFilter = rowFilter; - } - - @Override - public boolean filterRecord(Project project, Record record) { - for (int r = record.fromRowIndex; r < record.toRowIndex; r++) { - if (!_rowFilter.filterRow(project, r, project.rows.get(r))) { - return false; - } - } - return true; - } -} diff --git a/main/src/com/google/gridworks/browsing/filters/AnyRowRecordFilter.java b/main/src/com/google/gridworks/browsing/filters/AnyRowRecordFilter.java deleted file mode 100644 index d3469ed4e..000000000 --- a/main/src/com/google/gridworks/browsing/filters/AnyRowRecordFilter.java +++ /dev/null @@ -1,24 +0,0 @@ -package com.google.gridworks.browsing.filters; - -import com.google.gridworks.browsing.RecordFilter; -import com.google.gridworks.browsing.RowFilter; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Record; - -public class AnyRowRecordFilter implements RecordFilter { - final protected RowFilter _rowFilter; - - public AnyRowRecordFilter(RowFilter rowFilter) { - _rowFilter = rowFilter; - } - - @Override - public boolean filterRecord(Project project, Record record) { - for (int r = record.fromRowIndex; r < record.toRowIndex; r++) { - if (_rowFilter.filterRow(project, r, project.rows.get(r))) { - return true; - } - } - return false; - } -} diff --git a/main/src/com/google/gridworks/browsing/filters/DualExpressionsNumberComparisonRowFilter.java b/main/src/com/google/gridworks/browsing/filters/DualExpressionsNumberComparisonRowFilter.java deleted file mode 100644 index a5480ab87..000000000 --- a/main/src/com/google/gridworks/browsing/filters/DualExpressionsNumberComparisonRowFilter.java +++ /dev/null @@ -1,85 +0,0 @@ -package com.google.gridworks.browsing.filters; - -import java.util.Collection; -import java.util.Properties; - -import com.google.gridworks.browsing.RowFilter; -import com.google.gridworks.expr.Evaluable; -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; - -/** - * Judge if a row matches by evaluating two given expressions on the row, based on two different columns - * and checking the results. It's a match if the result satisfies some numeric comparisons. - */ -abstract public class DualExpressionsNumberComparisonRowFilter implements RowFilter { - - final protected Evaluable _x_evaluable; - final protected String _x_columnName; - final protected int _x_cellIndex; - final protected Evaluable _y_evaluable; - final protected String _y_columnName; - final protected int _y_cellIndex; - - public DualExpressionsNumberComparisonRowFilter ( - Evaluable x_evaluable, - String x_columnName, - int x_cellIndex, - Evaluable y_evaluable, - String y_columnName, - int y_cellIndex - ) { - _x_evaluable = x_evaluable; - _x_columnName = x_columnName; - _x_cellIndex = x_cellIndex; - _y_evaluable = y_evaluable; - _y_columnName = y_columnName; - _y_cellIndex = y_cellIndex; - } - - public boolean filterRow(Project project, int rowIndex, Row row) { - Cell x_cell = _x_cellIndex < 0 ? null : row.getCell(_x_cellIndex); - Properties x_bindings = ExpressionUtils.createBindings(project); - ExpressionUtils.bind(x_bindings, row, rowIndex, _x_columnName, x_cell); - Object x_value = _x_evaluable.evaluate(x_bindings); - - Cell y_cell = _y_cellIndex < 0 ? null : row.getCell(_y_cellIndex); - Properties y_bindings = ExpressionUtils.createBindings(project); - ExpressionUtils.bind(y_bindings, row, rowIndex, _y_columnName, y_cell); - Object y_value = _y_evaluable.evaluate(y_bindings); - - if (x_value != null && y_value != null) { - if (x_value.getClass().isArray() || y_value.getClass().isArray()) { - return false; - } else if (x_value instanceof Collection || y_value instanceof Collection) { - return false; - } // else, fall through - } - - return checkValue(x_value,y_value); - } - - protected boolean checkValue(Object vx, Object vy) { - if (ExpressionUtils.isError(vx) || ExpressionUtils.isError(vy)) { - return false; - } else if (ExpressionUtils.isNonBlankData(vx) && ExpressionUtils.isNonBlankData(vy)) { - if (vx instanceof Number && vy instanceof Number) { - double dx = ((Number) vx).doubleValue(); - double dy = ((Number) vy).doubleValue(); - return (!Double.isInfinite(dx) && - !Double.isNaN(dx) && - !Double.isInfinite(dy) && - !Double.isNaN(dy) && - checkValues(dx,dy)); - } else { - return false; - } - } else { - return false; - } - } - - abstract protected boolean checkValues(double dx, double dy); -} diff --git a/main/src/com/google/gridworks/browsing/filters/ExpressionEqualRowFilter.java b/main/src/com/google/gridworks/browsing/filters/ExpressionEqualRowFilter.java deleted file mode 100644 index 3a52b5cf5..000000000 --- a/main/src/com/google/gridworks/browsing/filters/ExpressionEqualRowFilter.java +++ /dev/null @@ -1,164 +0,0 @@ -package com.google.gridworks.browsing.filters; - -import java.util.Collection; -import java.util.Properties; - -import org.json.JSONArray; -import org.json.JSONException; - -import com.google.gridworks.browsing.RowFilter; -import com.google.gridworks.expr.Evaluable; -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; - -/** - * Judge if a row matches by evaluating a given expression on the row, based on a particular - * column, and checking the result. It's a match if the result is any one of a given list of - * values, or if the result is blank or error and we want blank or error values. - */ -public class ExpressionEqualRowFilter implements RowFilter { - final protected Evaluable _evaluable; // the expression to evaluate - - final protected String _columnName; - final protected int _cellIndex; // the expression is based on this column; - // -1 if based on no column in particular, - // for expression such as "row.starred". - - final protected Object[] _matches; - final protected boolean _selectBlank; - final protected boolean _selectError; - final protected boolean _invert; - - public ExpressionEqualRowFilter( - Evaluable evaluable, - String columnName, - int cellIndex, - Object[] matches, - boolean selectBlank, - boolean selectError, - boolean invert - ) { - _evaluable = evaluable; - _columnName = columnName; - _cellIndex = cellIndex; - _matches = matches; - _selectBlank = selectBlank; - _selectError = selectError; - _invert = invert; - } - - public boolean filterRow(Project project, int rowIndex, Row row) { - return _invert ? - internalInvertedFilterRow(project, rowIndex, row) : - internalFilterRow(project, rowIndex, row); - } - - public boolean internalFilterRow(Project project, int rowIndex, Row row) { - Cell cell = _cellIndex < 0 ? null : row.getCell(_cellIndex); - - Properties bindings = ExpressionUtils.createBindings(project); - ExpressionUtils.bind(bindings, row, rowIndex, _columnName, cell); - - Object value = _evaluable.evaluate(bindings); - if (value != null) { - if (value.getClass().isArray()) { - Object[] a = (Object[]) value; - for (Object v : a) { - if (testValue(v)) { - return true; - } - } - return false; - } else if (value instanceof Collection) { - for (Object v : ExpressionUtils.toObjectCollection(value)) { - if (testValue(v)) { - return true; - } - } - return false; - } else if (value instanceof JSONArray) { - JSONArray a = (JSONArray) value; - int l = a.length(); - - for (int i = 0; i < l; i++) { - try { - if (testValue(a.get(i))) { - return true; - } - } catch (JSONException e) { - // ignore - } - } - return false; - } // else, fall through - } - - return testValue(value); - } - - public boolean internalInvertedFilterRow(Project project, int rowIndex, Row row) { - Cell cell = _cellIndex < 0 ? null : row.getCell(_cellIndex); - - Properties bindings = ExpressionUtils.createBindings(project); - ExpressionUtils.bind(bindings, row, rowIndex, _columnName, cell); - - Object value = _evaluable.evaluate(bindings); - if (value != null) { - if (value.getClass().isArray()) { - Object[] a = (Object[]) value; - for (Object v : a) { - if (testValue(v)) { - return false; - } - } - return true; - } else if (value instanceof Collection) { - for (Object v : ExpressionUtils.toObjectCollection(value)) { - if (testValue(v)) { - return false; - } - } - return true; - } else if (value instanceof JSONArray) { - JSONArray a = (JSONArray) value; - int l = a.length(); - - for (int i = 0; i < l; i++) { - try { - if (testValue(a.get(i))) { - return false; - } - } catch (JSONException e) { - // ignore - } - } - return true; - } // else, fall through - } - - return !testValue(value); - } - - protected boolean testValue(Object v) { - if (ExpressionUtils.isError(v)) { - return _selectError; - } else if (ExpressionUtils.isNonBlankData(v)) { - for (Object match : _matches) { - if (testValue(v, match)) { - return true; - } - } - return false; - } else { - return _selectBlank; - } - } - - protected boolean testValue(Object v, Object match) { - return (v instanceof Number && match instanceof Number) ? - ((Number) match).doubleValue() == ((Number) v).doubleValue() : - match.equals(v); - } -} diff --git a/main/src/com/google/gridworks/browsing/filters/ExpressionNumberComparisonRowFilter.java b/main/src/com/google/gridworks/browsing/filters/ExpressionNumberComparisonRowFilter.java deleted file mode 100644 index 6fe3d5bda..000000000 --- a/main/src/com/google/gridworks/browsing/filters/ExpressionNumberComparisonRowFilter.java +++ /dev/null @@ -1,102 +0,0 @@ -package com.google.gridworks.browsing.filters; - -import java.util.Collection; -import java.util.Properties; - -import org.json.JSONArray; -import org.json.JSONException; - -import com.google.gridworks.browsing.RowFilter; -import com.google.gridworks.browsing.util.RowEvaluable; -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; - -/** - * Judge if a row matches by evaluating a given expression on the row, based on a particular - * column, and checking the result. It's a match if the result satisfies some numeric comparisons, - * or if the result is non-numeric or blank or error and we want non-numeric or blank or error - * values. - */ -abstract public class ExpressionNumberComparisonRowFilter implements RowFilter { - final protected RowEvaluable _rowEvaluable; - final protected boolean _selectNumeric; - final protected boolean _selectNonNumeric; - final protected boolean _selectBlank; - final protected boolean _selectError; - - public ExpressionNumberComparisonRowFilter( - RowEvaluable rowEvaluable, - boolean selectNumeric, - boolean selectNonNumeric, - boolean selectBlank, - boolean selectError - ) { - _rowEvaluable = rowEvaluable; - _selectNumeric = selectNumeric; - _selectNonNumeric = selectNonNumeric; - _selectBlank = selectBlank; - _selectError = selectError; - } - - public boolean filterRow(Project project, int rowIndex, Row row) { - Properties bindings = ExpressionUtils.createBindings(project); - - Object value = _rowEvaluable.eval(project, rowIndex, row, bindings); - if (value != null) { - if (value.getClass().isArray()) { - Object[] a = (Object[]) value; - for (Object v : a) { - if (checkValue(v)) { - return true; - } - } - return false; - } else if (value instanceof Collection) { - for (Object v : ExpressionUtils.toObjectCollection(value)) { - if (checkValue(v)) { - return true; - } - } - return false; - } else if (value instanceof JSONArray) { - JSONArray a = (JSONArray) value; - int l = a.length(); - - for (int i = 0; i < l; i++) { - try { - if (checkValue(a.get(i))) { - return true; - } - } catch (JSONException e) { - // ignore - } - } - return false; - } // else, fall through - } - - return checkValue(value); - } - - protected boolean checkValue(Object v) { - if (ExpressionUtils.isError(v)) { - return _selectError; - } else if (ExpressionUtils.isNonBlankData(v)) { - if (v instanceof Number) { - double d = ((Number) v).doubleValue(); - if (Double.isInfinite(d) || Double.isNaN(d)) { - return _selectError; - } else { - return _selectNumeric && checkValue(d); - } - } else { - return _selectNonNumeric; - } - } else { - return _selectBlank; - } - } - - abstract protected boolean checkValue(double d); -} diff --git a/main/src/com/google/gridworks/browsing/filters/ExpressionStringComparisonRowFilter.java b/main/src/com/google/gridworks/browsing/filters/ExpressionStringComparisonRowFilter.java deleted file mode 100644 index 41f013d3f..000000000 --- a/main/src/com/google/gridworks/browsing/filters/ExpressionStringComparisonRowFilter.java +++ /dev/null @@ -1,77 +0,0 @@ -package com.google.gridworks.browsing.filters; - -import java.util.Collection; -import java.util.Properties; - -import org.json.JSONArray; -import org.json.JSONException; - -import com.google.gridworks.browsing.RowFilter; -import com.google.gridworks.expr.Evaluable; -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; - -/** - * Judge if a row matches by evaluating a given expression on the row, based on a particular - * column, and checking the result. It's a match if the result satisfies some string comparisons. - */ -abstract public class ExpressionStringComparisonRowFilter implements RowFilter { - final protected Evaluable _evaluable; - final protected String _columnName; - final protected int _cellIndex; - - public ExpressionStringComparisonRowFilter(Evaluable evaluable, String columnName, int cellIndex) { - _evaluable = evaluable; - _columnName = columnName; - _cellIndex = cellIndex; - } - - public boolean filterRow(Project project, int rowIndex, Row row) { - Cell cell = _cellIndex < 0 ? null : row.getCell(_cellIndex); - - Properties bindings = ExpressionUtils.createBindings(project); - ExpressionUtils.bind(bindings, row, rowIndex, _columnName, cell); - - Object value = _evaluable.evaluate(bindings); - if (value != null) { - if (value.getClass().isArray()) { - Object[] a = (Object[]) value; - for (Object v : a) { - if (checkValue(v instanceof String ? ((String) v) : v.toString())) { - return true; - } - } - } else if (value instanceof Collection) { - for (Object v : ExpressionUtils.toObjectCollection(value)) { - if (checkValue(v.toString())) { - return true; - } - } - return false; - } else if (value instanceof JSONArray) { - JSONArray a = (JSONArray) value; - int l = a.length(); - - for (int i = 0; i < l; i++) { - try { - if (checkValue(a.get(i).toString())) { - return true; - } - } catch (JSONException e) { - // ignore - } - } - return false; - } else { - if (checkValue(value instanceof String ? ((String) value) : value.toString())) { - return true; - } - } - } - return false; - } - - abstract protected boolean checkValue(String s); -} diff --git a/main/src/com/google/gridworks/browsing/filters/ExpressionTimeComparisonRowFilter.java b/main/src/com/google/gridworks/browsing/filters/ExpressionTimeComparisonRowFilter.java deleted file mode 100644 index 575fe19aa..000000000 --- a/main/src/com/google/gridworks/browsing/filters/ExpressionTimeComparisonRowFilter.java +++ /dev/null @@ -1,52 +0,0 @@ -package com.google.gridworks.browsing.filters; - -import java.util.Date; - -import com.google.gridworks.browsing.util.RowEvaluable; -import com.google.gridworks.expr.ExpressionUtils; - -/** - * Judge if a row matches by evaluating a given expression on the row, based on a particular - * column, and checking the result. It's a match if the result satisfies some time comparisons, - * or if the result is not a time or blank or error and we want non-time or blank or error - * values. - */ -abstract public class ExpressionTimeComparisonRowFilter extends ExpressionNumberComparisonRowFilter { - - final protected boolean _selectTime; - final protected boolean _selectNonTime; - - public ExpressionTimeComparisonRowFilter( - RowEvaluable rowEvaluable, - boolean selectTime, - boolean selectNonTime, - boolean selectBlank, - boolean selectError - ) { - super(rowEvaluable, selectTime, selectNonTime, selectBlank, selectError); - _selectTime = selectTime; - _selectNonTime = selectNonTime; - } - - protected boolean checkValue(Object v) { - if (ExpressionUtils.isError(v)) { - return _selectError; - } else if (ExpressionUtils.isNonBlankData(v)) { - if (v instanceof Date) { - long time = ((Date) v).getTime(); - return _selectTime && checkValue(time); - } else { - return _selectNonTime; - } - } else { - return _selectBlank; - } - } - - // not really needed for operation, just to make extending the abstract class possible - protected boolean checkValue(double d) { - return false; - } - - abstract protected boolean checkValue(long d); -} diff --git a/main/src/com/google/gridworks/browsing/util/ConjunctiveFilteredRecords.java b/main/src/com/google/gridworks/browsing/util/ConjunctiveFilteredRecords.java deleted file mode 100644 index 87be90871..000000000 --- a/main/src/com/google/gridworks/browsing/util/ConjunctiveFilteredRecords.java +++ /dev/null @@ -1,49 +0,0 @@ -package com.google.gridworks.browsing.util; - -import java.util.LinkedList; -import java.util.List; - -import com.google.gridworks.browsing.FilteredRecords; -import com.google.gridworks.browsing.RecordFilter; -import com.google.gridworks.browsing.RecordVisitor; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Record; - -/** - * Encapsulate logic for visiting records that match all given record filters. - */ -public class ConjunctiveFilteredRecords implements FilteredRecords { - final protected List _recordFilters = new LinkedList(); - - public void add(RecordFilter recordFilter) { - _recordFilters.add(recordFilter); - } - - @Override - public void accept(Project project, RecordVisitor visitor) { - try { - visitor.start(project); - - int c = project.recordModel.getRecordCount(); - for (int r = 0; r < c; r++) { - Record record = project.recordModel.getRecord(r); - if (matchRecord(project, record)) { - if (visitor.visit(project, record)) { - return; - } - } - } - } finally { - visitor.end(project); - } - } - - protected boolean matchRecord(Project project, Record record) { - for (RecordFilter recordFilter : _recordFilters) { - if (!recordFilter.filterRecord(project, record)) { - return false; - } - } - return true; - } -} diff --git a/main/src/com/google/gridworks/browsing/util/ConjunctiveFilteredRows.java b/main/src/com/google/gridworks/browsing/util/ConjunctiveFilteredRows.java deleted file mode 100644 index 7d948402d..000000000 --- a/main/src/com/google/gridworks/browsing/util/ConjunctiveFilteredRows.java +++ /dev/null @@ -1,51 +0,0 @@ -package com.google.gridworks.browsing.util; - -import java.util.LinkedList; -import java.util.List; - -import com.google.gridworks.browsing.FilteredRows; -import com.google.gridworks.browsing.RowFilter; -import com.google.gridworks.browsing.RowVisitor; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; - -/** - * Encapsulate logic for visiting rows that match all give row filters. Also visit - * context rows and dependent rows if configured so. - */ -public class ConjunctiveFilteredRows implements FilteredRows { - final protected List _rowFilters = new LinkedList(); - - public void add(RowFilter rowFilter) { - _rowFilters.add(rowFilter); - } - - public void accept(Project project, RowVisitor visitor) { - try { - visitor.start(project); - - int c = project.rows.size(); - for (int rowIndex = 0; rowIndex < c; rowIndex++) { - Row row = project.rows.get(rowIndex); - if (matchRow(project, rowIndex, row)) { - visitRow(project, visitor, rowIndex, row); - } - } - } finally { - visitor.end(project); - } - } - - protected void visitRow(Project project, RowVisitor visitor, int rowIndex, Row row) { - visitor.visit(project, rowIndex, row); - } - - protected boolean matchRow(Project project, int rowIndex, Row row) { - for (RowFilter rowFilter : _rowFilters) { - if (!rowFilter.filterRow(project, rowIndex, row)) { - return false; - } - } - return true; - } -} diff --git a/main/src/com/google/gridworks/browsing/util/ExpressionBasedRowEvaluable.java b/main/src/com/google/gridworks/browsing/util/ExpressionBasedRowEvaluable.java deleted file mode 100644 index 13735ebca..000000000 --- a/main/src/com/google/gridworks/browsing/util/ExpressionBasedRowEvaluable.java +++ /dev/null @@ -1,34 +0,0 @@ -package com.google.gridworks.browsing.util; - -import java.util.Properties; - -import com.google.gridworks.expr.Evaluable; -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; - -public class ExpressionBasedRowEvaluable implements RowEvaluable { - final protected String _columnName; - final protected int _cellIndex; - final protected Evaluable _eval; - - public ExpressionBasedRowEvaluable( - String columnName, int cellIndex, Evaluable eval) { - - _columnName = columnName; - _cellIndex = cellIndex; - _eval = eval; - } - - @Override - public Object eval( - Project project, int rowIndex, Row row, Properties bindings) { - - Cell cell = row.getCell(_cellIndex); - - ExpressionUtils.bind(bindings, row, rowIndex, _columnName, cell); - - return _eval.evaluate(bindings); - } -} diff --git a/main/src/com/google/gridworks/browsing/util/ExpressionNominalValueGrouper.java b/main/src/com/google/gridworks/browsing/util/ExpressionNominalValueGrouper.java deleted file mode 100644 index 69911f649..000000000 --- a/main/src/com/google/gridworks/browsing/util/ExpressionNominalValueGrouper.java +++ /dev/null @@ -1,211 +0,0 @@ -package com.google.gridworks.browsing.util; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Properties; - -import com.google.gridworks.browsing.DecoratedValue; -import com.google.gridworks.browsing.RecordVisitor; -import com.google.gridworks.browsing.RowVisitor; -import com.google.gridworks.browsing.facets.NominalFacetChoice; -import com.google.gridworks.expr.Evaluable; -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Record; -import com.google.gridworks.model.Row; - -/** - * Visit matched rows or records and group them into facet choices based on the values computed - * from a given expression. - */ -public class ExpressionNominalValueGrouper implements RowVisitor, RecordVisitor { - static public class IndexedNominalFacetChoice extends NominalFacetChoice { - int _latestIndex; - - public IndexedNominalFacetChoice(DecoratedValue decoratedValue, int latestIndex) { - super(decoratedValue); - _latestIndex = latestIndex; - } - } - - /* - * Configuration - */ - final protected Evaluable _evaluable; - final protected String _columnName; - final protected int _cellIndex; - - /* - * Computed results - */ - final public Map choices = new HashMap(); - public int blankCount = 0; - public int errorCount = 0; - - /* - * Scratch pad variables - */ - protected boolean hasBlank; - protected boolean hasError; - - public ExpressionNominalValueGrouper(Evaluable evaluable, String columnName, int cellIndex) { - _evaluable = evaluable; - _columnName = columnName; - _cellIndex = cellIndex; - } - - @Override - public void start(Project project) { - // nothing to do - } - - @Override - public void end(Project project) { - // nothing to do - } - - public boolean visit(Project project, int rowIndex, Row row) { - hasError = false; - hasBlank = false; - - Properties bindings = ExpressionUtils.createBindings(project); - - visitRow(project, rowIndex, row, bindings, rowIndex); - - if (hasError) { - errorCount++; - } - if (hasBlank) { - blankCount++; - } - - return false; - } - - @Override - public boolean visit(Project project, Record record) { - hasError = false; - hasBlank = false; - - Properties bindings = ExpressionUtils.createBindings(project); - - for (int r = record.fromRowIndex; r < record.toRowIndex; r++) { - Row row = project.rows.get(r); - visitRow(project, r, row, bindings, record.recordIndex); - } - - if (hasError) { - errorCount++; - } - if (hasBlank) { - blankCount++; - } - - return false; - } - - protected void visitRow(Project project, int rowIndex, Row row, Properties bindings, int index) { - Object value = evalRow(project, rowIndex, row, bindings); - if (value != null) { - if (value.getClass().isArray()) { - Object[] a = (Object[]) value; - for (Object v : a) { - processValue(v, rowIndex); - } - } else if (value instanceof Collection) { - for (Object v : ExpressionUtils.toObjectCollection(value)) { - processValue(v, rowIndex); - } - } else { - processValue(value, rowIndex); - } - } else { - processValue(value, rowIndex); - } - } - - protected Object evalRow(Project project, int rowIndex, Row row, Properties bindings) { - Cell cell = _cellIndex < 0 ? null : row.getCell(_cellIndex); - - ExpressionUtils.bind(bindings, row, rowIndex, _columnName, cell); - - return _evaluable.evaluate(bindings); - } - - protected void processValue(Object value, int index) { - if (ExpressionUtils.isError(value)) { - hasError = true; - } else if (ExpressionUtils.isNonBlankData(value)) { - String valueString = value.toString(); - IndexedNominalFacetChoice facetChoice = choices.get(valueString); - - if (facetChoice != null) { - if (facetChoice._latestIndex < index) { - facetChoice._latestIndex = index; - facetChoice.count++; - } - } else { - String label = value.toString(); - DecoratedValue dValue = new DecoratedValue(value, label); - IndexedNominalFacetChoice choice = - new IndexedNominalFacetChoice(dValue, index); - - choice.count = 1; - choices.put(valueString, choice); - } - } else { - hasBlank = true; - } - } - - public RowEvaluable getChoiceCountRowEvaluable() { - return new RowEvaluable() { - @Override - public Object eval(Project project, int rowIndex, Row row, Properties bindings) { - Object value = evalRow(project, rowIndex, row, bindings); - return getChoiceValueCountMultiple(value); - } - - }; - } - - public Object getChoiceValueCountMultiple(Object value) { - if (value != null) { - if (value.getClass().isArray()) { - Object[] choiceValues = (Object[]) value; - List counts = new ArrayList(choiceValues.length); - - for (int i = 0; i < choiceValues.length; i++) { - counts.add(getChoiceValueCount(choiceValues[i])); - } - return counts; - } else if (value instanceof Collection) { - List choiceValues = ExpressionUtils.toObjectList(value); - List counts = new ArrayList(choiceValues.size()); - - int count = choiceValues.size(); - for (int i = 0; i < count; i++) { - counts.add(getChoiceValueCount(choiceValues.get(i))); - } - return counts; - } - } - - return getChoiceValueCount(value); - } - - public Integer getChoiceValueCount(Object choiceValue) { - if (ExpressionUtils.isError(choiceValue)) { - return errorCount; - } else if (ExpressionUtils.isNonBlankData(choiceValue)) { - IndexedNominalFacetChoice choice = choices.get(choiceValue); - return choice != null ? choice.count : 0; - } else { - return blankCount; - } - } -} diff --git a/main/src/com/google/gridworks/browsing/util/ExpressionNumericValueBinner.java b/main/src/com/google/gridworks/browsing/util/ExpressionNumericValueBinner.java deleted file mode 100644 index 507e5777e..000000000 --- a/main/src/com/google/gridworks/browsing/util/ExpressionNumericValueBinner.java +++ /dev/null @@ -1,148 +0,0 @@ -package com.google.gridworks.browsing.util; - -import java.util.Collection; -import java.util.Properties; - -import com.google.gridworks.browsing.RecordVisitor; -import com.google.gridworks.browsing.RowVisitor; -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Record; -import com.google.gridworks.model.Row; - -/** - * Visit matched rows or records and slot them into bins based on the numbers computed - * from a given expression. - */ -public class ExpressionNumericValueBinner implements RowVisitor, RecordVisitor { - /* - * Configuration - */ - final protected RowEvaluable _rowEvaluable; - final protected NumericBinIndex _index; // base bins - - /* - * Computed results - */ - final public int[] bins; - public int numericCount; - public int nonNumericCount; - public int blankCount; - public int errorCount; - - /* - * Scratchpad variables - */ - protected boolean hasError; - protected boolean hasBlank; - protected boolean hasNumeric; - protected boolean hasNonNumeric; - - public ExpressionNumericValueBinner(RowEvaluable rowEvaluable, NumericBinIndex index) { - _rowEvaluable = rowEvaluable; - _index = index; - bins = new int[_index.getBins().length]; - } - - @Override - public void start(Project project) { - // nothing to do - } - - @Override - public void end(Project project) { - // nothing to do - } - - @Override - public boolean visit(Project project, int rowIndex, Row row) { - resetFlags(); - - Properties bindings = ExpressionUtils.createBindings(project); - processRow(project, rowIndex, row, bindings); - - updateCounts(); - - return false; - } - - @Override - public boolean visit(Project project, Record record) { - resetFlags(); - - Properties bindings = ExpressionUtils.createBindings(project); - for (int r = record.fromRowIndex; r < record.toRowIndex; r++) { - processRow(project, r, project.rows.get(r), bindings); - } - - updateCounts(); - - return false; - } - - protected void resetFlags() { - hasError = false; - hasBlank = false; - hasNumeric = false; - hasNonNumeric = false; - } - - protected void updateCounts() { - if (hasError) { - errorCount++; - } - if (hasBlank) { - blankCount++; - } - if (hasNumeric) { - numericCount++; - } - if (hasNonNumeric) { - nonNumericCount++; - } - } - - protected void processRow(Project project, int rowIndex, Row row, Properties bindings) { - Object value = _rowEvaluable.eval(project, rowIndex, row, bindings); - if (value != null) { - if (value.getClass().isArray()) { - Object[] a = (Object[]) value; - for (Object v : a) { - processValue(v); - } - return; - } else if (value instanceof Collection) { - for (Object v : ExpressionUtils.toObjectCollection(value)) { - processValue(v); - } - return; - } // else, fall through - } - - processValue(value); - } - - protected void processValue(Object value) { - if (ExpressionUtils.isError(value)) { - hasError = true; - } else if (ExpressionUtils.isNonBlankData(value)) { - if (value instanceof Number) { - double d = ((Number) value).doubleValue(); - if (!Double.isInfinite(d) && !Double.isNaN(d)) { - hasNumeric = true; - - int bin = (int) Math.floor((d - _index.getMin()) / _index.getStep()); - if (bin >= 0 && bin < bins.length) { // as a precaution - bins[bin]++; - } - } else { - hasError = true; - } - } else { - hasNonNumeric = true; - } - } else { - hasBlank = true; - } - } -} diff --git a/main/src/com/google/gridworks/browsing/util/ExpressionTimeValueBinner.java b/main/src/com/google/gridworks/browsing/util/ExpressionTimeValueBinner.java deleted file mode 100644 index 2a99bb34b..000000000 --- a/main/src/com/google/gridworks/browsing/util/ExpressionTimeValueBinner.java +++ /dev/null @@ -1,146 +0,0 @@ -package com.google.gridworks.browsing.util; - -import java.util.Collection; -import java.util.Date; -import java.util.Properties; - -import com.google.gridworks.browsing.RecordVisitor; -import com.google.gridworks.browsing.RowVisitor; -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Record; -import com.google.gridworks.model.Row; - -/** - * Visit matched rows or records and slot them into bins based on the date computed - * from a given expression. - */ -public class ExpressionTimeValueBinner implements RowVisitor, RecordVisitor { - - /* - * Configuration - */ - final protected RowEvaluable _rowEvaluable; - final protected TimeBinIndex _index; // base bins - - /* - * Computed results - */ - final public int[] bins; - public int timeCount; - public int nonTimeCount; - public int blankCount; - public int errorCount; - - /* - * Scratchpad variables - */ - protected boolean hasError; - protected boolean hasBlank; - protected boolean hasTime; - protected boolean hasNonTime; - - public ExpressionTimeValueBinner(RowEvaluable rowEvaluable, TimeBinIndex index) { - _rowEvaluable = rowEvaluable; - _index = index; - bins = new int[_index.getBins().length]; - } - - @Override - public void start(Project project) { - // nothing to do - } - - @Override - public void end(Project project) { - // nothing to do - } - - @Override - public boolean visit(Project project, int rowIndex, Row row) { - resetFlags(); - - Properties bindings = ExpressionUtils.createBindings(project); - processRow(project, rowIndex, row, bindings); - - updateCounts(); - - return false; - } - - @Override - public boolean visit(Project project, Record record) { - resetFlags(); - - Properties bindings = ExpressionUtils.createBindings(project); - for (int r = record.fromRowIndex; r < record.toRowIndex; r++) { - processRow(project, r, project.rows.get(r), bindings); - } - - updateCounts(); - - return false; - } - - protected void resetFlags() { - hasError = false; - hasBlank = false; - hasTime = false; - hasNonTime = false; - } - - protected void updateCounts() { - if (hasError) { - errorCount++; - } - if (hasBlank) { - blankCount++; - } - if (hasTime) { - timeCount++; - } - if (hasNonTime) { - nonTimeCount++; - } - } - - protected void processRow(Project project, int rowIndex, Row row, Properties bindings) { - Object value = _rowEvaluable.eval(project, rowIndex, row, bindings); - if (value != null) { - if (value.getClass().isArray()) { - Object[] a = (Object[]) value; - for (Object v : a) { - processValue(v); - } - return; - } else if (value instanceof Collection) { - for (Object v : ExpressionUtils.toObjectCollection(value)) { - processValue(v); - } - return; - } // else, fall through - } - - processValue(value); - } - - protected void processValue(Object value) { - if (ExpressionUtils.isError(value)) { - hasError = true; - } else if (ExpressionUtils.isNonBlankData(value)) { - if (value instanceof Date) { - long t = ((Date) value).getTime(); - hasTime = true; - - int bin = (int) Math.floor((t - _index.getMin()) / _index.getStep()); - if (bin >= 0 && bin < bins.length) { // as a precaution - bins[bin]++; - } - } else { - hasNonTime = true; - } - } else { - hasBlank = true; - } - } -} diff --git a/main/src/com/google/gridworks/browsing/util/FilteredRecordsAsFilteredRows.java b/main/src/com/google/gridworks/browsing/util/FilteredRecordsAsFilteredRows.java deleted file mode 100644 index 3dd67f9d3..000000000 --- a/main/src/com/google/gridworks/browsing/util/FilteredRecordsAsFilteredRows.java +++ /dev/null @@ -1,20 +0,0 @@ -package com.google.gridworks.browsing.util; - -import com.google.gridworks.browsing.FilteredRecords; -import com.google.gridworks.browsing.FilteredRows; -import com.google.gridworks.browsing.RowVisitor; -import com.google.gridworks.model.Project; - -public class FilteredRecordsAsFilteredRows implements FilteredRows { - final protected FilteredRecords _filteredRecords; - - public FilteredRecordsAsFilteredRows(FilteredRecords filteredRecords) { - _filteredRecords = filteredRecords; - } - - @Override - public void accept(Project project, RowVisitor visitor) { - _filteredRecords.accept(project, new RowVisitorAsRecordVisitor(visitor)); - } - -} diff --git a/main/src/com/google/gridworks/browsing/util/NumericBinIndex.java b/main/src/com/google/gridworks/browsing/util/NumericBinIndex.java deleted file mode 100644 index 4e4c8d1d5..000000000 --- a/main/src/com/google/gridworks/browsing/util/NumericBinIndex.java +++ /dev/null @@ -1,226 +0,0 @@ -package com.google.gridworks.browsing.util; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.Properties; - -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; - -/** - * A utility class for computing the base bins that form the base histograms of - * numeric range facets. It evaluates an expression on all the rows of a project to - * get numeric values, determines how many bins to distribute those values in, and - * bins the rows accordingly. - * - * This class processes all rows rather than just the filtered rows because it - * needs to compute the base bins of a numeric range facet, which remain unchanged - * as the user interacts with the facet. - */ -abstract public class NumericBinIndex { - - protected int _totalValueCount; - protected int _numbericValueCount; - protected double _min; - protected double _max; - protected double _step; - protected int[] _bins; - - protected int _numericRowCount; - protected int _nonNumericRowCount; - protected int _blankRowCount; - protected int _errorRowCount; - - protected boolean _hasError = false; - protected boolean _hasNonNumeric = false; - protected boolean _hasNumeric = false; - protected boolean _hasBlank = false; - - abstract protected void iterate(Project project, RowEvaluable rowEvaluable, List allValues); - - public NumericBinIndex(Project project, RowEvaluable rowEvaluable) { - _min = Double.POSITIVE_INFINITY; - _max = Double.NEGATIVE_INFINITY; - - List allValues = new ArrayList(); - - iterate(project, rowEvaluable, allValues); - - _numbericValueCount = allValues.size(); - - if (_min >= _max) { - _step = 1; - _min = Math.min(_min, _max); - _max = _step; - _bins = new int[1]; - - return; - } - - double diff = _max - _min; - - _step = 1; - if (diff > 10) { - while (_step * 100 < diff) { - _step *= 10; - } - } else { - while (_step * 100 > diff) { - _step /= 10; - } - } - - double originalMax = _max; - _min = (Math.floor(_min / _step) * _step); - _max = (Math.ceil(_max / _step) * _step); - - double binCount = (_max - _min) / _step; - if (binCount > 100) { - _step *= 2; - binCount = (binCount + 1) / 2; - } - - if (_max <= originalMax) { - _max += _step; - binCount++; - } - - _bins = new int[(int) Math.round(binCount)]; - for (double d : allValues) { - int bin = Math.max((int) Math.floor((d - _min) / _step),0); - _bins[bin]++; - } - } - - public boolean isNumeric() { - return _numbericValueCount > _totalValueCount / 2; - } - - public double getMin() { - return _min; - } - - public double getMax() { - return _max; - } - - public double getStep() { - return _step; - } - - public int[] getBins() { - return _bins; - } - - public int getNumericRowCount() { - return _numericRowCount; - } - - public int getNonNumericRowCount() { - return _nonNumericRowCount; - } - - public int getBlankRowCount() { - return _blankRowCount; - } - - public int getErrorRowCount() { - return _errorRowCount; - } - - protected void processRow( - Project project, - RowEvaluable rowEvaluable, - List allValues, - int rowIndex, - Row row, - Properties bindings - ) { - Object value = rowEvaluable.eval(project, rowIndex, row, bindings); - - if (ExpressionUtils.isError(value)) { - _hasError = true; - } else if (ExpressionUtils.isNonBlankData(value)) { - if (value.getClass().isArray()) { - Object[] a = (Object[]) value; - for (Object v : a) { - _totalValueCount++; - - if (ExpressionUtils.isError(v)) { - _hasError = true; - } else if (ExpressionUtils.isNonBlankData(v)) { - if (v instanceof Number) { - _hasNumeric = true; - processValue(((Number) v).doubleValue(), allValues); - } else { - _hasNonNumeric = true; - } - } else { - _hasBlank = true; - } - } - } else if (value instanceof Collection) { - for (Object v : ExpressionUtils.toObjectCollection(value)) { - _totalValueCount++; - - if (ExpressionUtils.isError(v)) { - _hasError = true; - } else if (ExpressionUtils.isNonBlankData(v)) { - if (v instanceof Number) { - _hasNumeric = true; - processValue(((Number) v).doubleValue(), allValues); - } else { - _hasNonNumeric = true; - } - } else { - _hasBlank = true; - } - } - } else { - _totalValueCount++; - - if (value instanceof Number) { - _hasNumeric = true; - processValue(((Number) value).doubleValue(), allValues); - } else { - _hasNonNumeric = true; - } - } - } else { - _hasBlank = true; - } - } - - protected void preprocessing() { - _hasBlank = false; - _hasError = false; - _hasNonNumeric = false; - _hasNumeric = false; - } - - protected void postprocessing() { - if (_hasError) { - _errorRowCount++; - } - if (_hasBlank) { - _blankRowCount++; - } - if (_hasNumeric) { - _numericRowCount++; - } - if (_hasNonNumeric) { - _nonNumericRowCount++; - } - } - - protected void processValue(double v, List allValues) { - if (!Double.isInfinite(v) && !Double.isNaN(v)) { - _min = Math.min(_min, v); - _max = Math.max(_max, v); - allValues.add(v); - } - } - -} diff --git a/main/src/com/google/gridworks/browsing/util/NumericBinRecordIndex.java b/main/src/com/google/gridworks/browsing/util/NumericBinRecordIndex.java deleted file mode 100644 index 1057bc57e..000000000 --- a/main/src/com/google/gridworks/browsing/util/NumericBinRecordIndex.java +++ /dev/null @@ -1,38 +0,0 @@ -package com.google.gridworks.browsing.util; - -import java.util.List; -import java.util.Properties; - -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Record; -import com.google.gridworks.model.Row; - -public class NumericBinRecordIndex extends NumericBinIndex { - public NumericBinRecordIndex(Project project, RowEvaluable rowEvaluable) { - super(project, rowEvaluable); - } - - @Override - protected void iterate( - Project project, RowEvaluable rowEvaluable, List allValues) { - - Properties bindings = ExpressionUtils.createBindings(project); - int count = project.recordModel.getRecordCount(); - - for (int r = 0; r < count; r++) { - Record record = project.recordModel.getRecord(r); - - preprocessing(); - - for (int i = record.fromRowIndex; i < record.toRowIndex; i++) { - Row row = project.rows.get(i); - - processRow(project, rowEvaluable, allValues, i, row, bindings); - } - - postprocessing(); - } - } - -} diff --git a/main/src/com/google/gridworks/browsing/util/NumericBinRowIndex.java b/main/src/com/google/gridworks/browsing/util/NumericBinRowIndex.java deleted file mode 100644 index 432dbccce..000000000 --- a/main/src/com/google/gridworks/browsing/util/NumericBinRowIndex.java +++ /dev/null @@ -1,33 +0,0 @@ -package com.google.gridworks.browsing.util; - -import java.util.List; -import java.util.Properties; - -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; - -public class NumericBinRowIndex extends NumericBinIndex { - public NumericBinRowIndex(Project project, RowEvaluable rowEvaluable) { - - super(project, rowEvaluable); - } - - @Override - protected void iterate( - Project project, RowEvaluable rowEvaluable, List allValues) { - - Properties bindings = ExpressionUtils.createBindings(project); - - for (int i = 0; i < project.rows.size(); i++) { - Row row = project.rows.get(i); - - preprocessing(); - - processRow(project, rowEvaluable, allValues, i, row, bindings); - - postprocessing(); - } - } - -} diff --git a/main/src/com/google/gridworks/browsing/util/RowEvaluable.java b/main/src/com/google/gridworks/browsing/util/RowEvaluable.java deleted file mode 100644 index f13c6bce0..000000000 --- a/main/src/com/google/gridworks/browsing/util/RowEvaluable.java +++ /dev/null @@ -1,10 +0,0 @@ -package com.google.gridworks.browsing.util; - -import java.util.Properties; - -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; - -public interface RowEvaluable { - public Object eval(Project project, int rowIndex, Row row, Properties bindings); -} diff --git a/main/src/com/google/gridworks/browsing/util/RowVisitorAsRecordVisitor.java b/main/src/com/google/gridworks/browsing/util/RowVisitorAsRecordVisitor.java deleted file mode 100644 index 3571d155d..000000000 --- a/main/src/com/google/gridworks/browsing/util/RowVisitorAsRecordVisitor.java +++ /dev/null @@ -1,34 +0,0 @@ -package com.google.gridworks.browsing.util; - -import com.google.gridworks.browsing.RecordVisitor; -import com.google.gridworks.browsing.RowVisitor; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Record; - -public class RowVisitorAsRecordVisitor implements RecordVisitor { - final protected RowVisitor _rowVisitor; - - public RowVisitorAsRecordVisitor(RowVisitor rowVisitor) { - _rowVisitor = rowVisitor; - } - - @Override - public void start(Project project) { - _rowVisitor.start(project); - } - - @Override - public void end(Project project) { - _rowVisitor.end(project); - } - - @Override - public boolean visit(Project project, Record record) { - for (int r = record.fromRowIndex; r < record.toRowIndex; r++) { - if (_rowVisitor.visit(project, r, project.rows.get(r))) { - return true; - } - } - return false; - } -} diff --git a/main/src/com/google/gridworks/browsing/util/TimeBinIndex.java b/main/src/com/google/gridworks/browsing/util/TimeBinIndex.java deleted file mode 100644 index 7fb386e10..000000000 --- a/main/src/com/google/gridworks/browsing/util/TimeBinIndex.java +++ /dev/null @@ -1,218 +0,0 @@ -package com.google.gridworks.browsing.util; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.Date; -import java.util.List; -import java.util.Properties; - -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; - -/** - * A utility class for computing the base bins that form the base histograms of - * temporal range facets. It evaluates an expression on all the rows of a project to - * get temporal values, determines how many bins to distribute those values in, and - * bins the rows accordingly. - * - * This class processes all rows rather than just the filtered rows because it - * needs to compute the base bins of a temporal range facet, which remain unchanged - * as the user interacts with the facet. - */ -abstract public class TimeBinIndex { - - protected int _totalValueCount; - protected int _timeValueCount; - protected long _min; - protected long _max; - protected long _step; - protected int[] _bins; - - protected int _timeRowCount; - protected int _nonTimeRowCount; - protected int _blankRowCount; - protected int _errorRowCount; - - protected boolean _hasError = false; - protected boolean _hasNonTime = false; - protected boolean _hasTime = false; - protected boolean _hasBlank = false; - - protected long[] steps = { - 1, // msec - 1000, // sec - 1000*60, // min - 1000*60*60, // hour - 1000*60*60*24, // day - 1000*60*60*24*7, // week - 1000*2629746, // month (average Gregorian year / 12) - 1000*31556952, // year (average Gregorian year) - 1000*31556952*10, // decade - 1000*31556952*100, // century - 1000*31556952*1000, // millennium - }; - - abstract protected void iterate(Project project, RowEvaluable rowEvaluable, List allValues); - - public TimeBinIndex(Project project, RowEvaluable rowEvaluable) { - _min = Long.MAX_VALUE; - _max = Long.MIN_VALUE; - - List allValues = new ArrayList(); - - iterate(project, rowEvaluable, allValues); - - _timeValueCount = allValues.size(); - - if (_min >= _max) { - _step = 1; - _min = Math.min(_min, _max); - _max = _step; - _bins = new int[1]; - - return; - } - - long diff = _max - _min; - - for (int i = 0; i < steps.length; i++) { - _step = steps[i]; - if (diff / _step <= 100) break; - } - - _bins = new int[(int) (diff / _step) + 1]; - for (long d : allValues) { - int bin = (int) Math.max((d - _min) / _step,0); - _bins[bin]++; - } - } - - public boolean isTemporal() { - return _timeValueCount > _totalValueCount / 2; - } - - public long getMin() { - return _min; - } - - public long getMax() { - return _max; - } - - public long getStep() { - return _step; - } - - public int[] getBins() { - return _bins; - } - - public int getTimeRowCount() { - return _timeRowCount; - } - - public int getNonTimeRowCount() { - return _nonTimeRowCount; - } - - public int getBlankRowCount() { - return _blankRowCount; - } - - public int getErrorRowCount() { - return _errorRowCount; - } - - protected void processRow( - Project project, - RowEvaluable rowEvaluable, - List allValues, - int rowIndex, - Row row, - Properties bindings - ) { - Object value = rowEvaluable.eval(project, rowIndex, row, bindings); - - if (ExpressionUtils.isError(value)) { - _hasError = true; - } else if (ExpressionUtils.isNonBlankData(value)) { - if (value.getClass().isArray()) { - Object[] a = (Object[]) value; - for (Object v : a) { - _totalValueCount++; - - if (ExpressionUtils.isError(v)) { - _hasError = true; - } else if (ExpressionUtils.isNonBlankData(v)) { - if (v instanceof Date) { - _hasTime = true; - processValue(((Date) v).getTime(), allValues); - } else { - _hasNonTime = true; - } - } else { - _hasBlank = true; - } - } - } else if (value instanceof Collection) { - for (Object v : ExpressionUtils.toObjectCollection(value)) { - _totalValueCount++; - - if (ExpressionUtils.isError(v)) { - _hasError = true; - } else if (ExpressionUtils.isNonBlankData(v)) { - if (v instanceof Date) { - _hasTime = true; - processValue(((Date) v).getTime(), allValues); - } else { - _hasNonTime = true; - } - } else { - _hasBlank = true; - } - } - } else { - _totalValueCount++; - - if (value instanceof Date) { - _hasTime = true; - processValue(((Date) value).getTime(), allValues); - } else { - _hasNonTime = true; - } - } - } else { - _hasBlank = true; - } - } - - protected void preprocessing() { - _hasBlank = false; - _hasError = false; - _hasNonTime = false; - _hasTime = false; - } - - protected void postprocessing() { - if (_hasError) { - _errorRowCount++; - } - if (_hasBlank) { - _blankRowCount++; - } - if (_hasTime) { - _timeRowCount++; - } - if (_hasNonTime) { - _nonTimeRowCount++; - } - } - - protected void processValue(long v, List allValues) { - _min = Math.min(_min, v); - _max = Math.max(_max, v); - allValues.add(v); - } - -} diff --git a/main/src/com/google/gridworks/browsing/util/TimeBinRecordIndex.java b/main/src/com/google/gridworks/browsing/util/TimeBinRecordIndex.java deleted file mode 100644 index d83b7a4f7..000000000 --- a/main/src/com/google/gridworks/browsing/util/TimeBinRecordIndex.java +++ /dev/null @@ -1,38 +0,0 @@ -package com.google.gridworks.browsing.util; - -import java.util.List; -import java.util.Properties; - -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Record; -import com.google.gridworks.model.Row; - -public class TimeBinRecordIndex extends TimeBinIndex { - - public TimeBinRecordIndex(Project project, RowEvaluable rowEvaluable) { - super(project, rowEvaluable); - } - - @Override - protected void iterate(Project project, RowEvaluable rowEvaluable, List allValues) { - - Properties bindings = ExpressionUtils.createBindings(project); - int count = project.recordModel.getRecordCount(); - - for (int r = 0; r < count; r++) { - Record record = project.recordModel.getRecord(r); - - preprocessing(); - - for (int i = record.fromRowIndex; i < record.toRowIndex; i++) { - Row row = project.rows.get(i); - - processRow(project, rowEvaluable, allValues, i, row, bindings); - } - - postprocessing(); - } - } - -} diff --git a/main/src/com/google/gridworks/browsing/util/TimeBinRowIndex.java b/main/src/com/google/gridworks/browsing/util/TimeBinRowIndex.java deleted file mode 100644 index c8c2a9806..000000000 --- a/main/src/com/google/gridworks/browsing/util/TimeBinRowIndex.java +++ /dev/null @@ -1,32 +0,0 @@ -package com.google.gridworks.browsing.util; - -import java.util.List; -import java.util.Properties; - -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; - -public class TimeBinRowIndex extends TimeBinIndex { - - public TimeBinRowIndex(Project project, RowEvaluable rowEvaluable) { - super(project, rowEvaluable); - } - - @Override - protected void iterate(Project project, RowEvaluable rowEvaluable, List allValues) { - - Properties bindings = ExpressionUtils.createBindings(project); - - for (int i = 0; i < project.rows.size(); i++) { - Row row = project.rows.get(i); - - preprocessing(); - - processRow(project, rowEvaluable, allValues, i, row, bindings); - - postprocessing(); - } - } - -} diff --git a/main/src/com/google/gridworks/clustering/Clusterer.java b/main/src/com/google/gridworks/clustering/Clusterer.java deleted file mode 100644 index 385ca9156..000000000 --- a/main/src/com/google/gridworks/clustering/Clusterer.java +++ /dev/null @@ -1,29 +0,0 @@ -package com.google.gridworks.clustering; - -import org.json.JSONObject; - -import com.google.gridworks.Jsonizable; -import com.google.gridworks.browsing.Engine; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; - -public abstract class Clusterer implements Jsonizable { - - protected Project _project; - protected int _colindex; - protected JSONObject _config; - - public abstract void computeClusters(Engine engine); - - public void initializeFromJSON(Project project, JSONObject o) throws Exception { - _project = project; - _config = o; - - String colname = o.getString("column"); - for (Column column : project.columnModel.columns) { - if (column.getName().equals(colname)) { - _colindex = column.getCellIndex(); - } - } - } -} diff --git a/main/src/com/google/gridworks/clustering/binning/BinningClusterer.java b/main/src/com/google/gridworks/clustering/binning/BinningClusterer.java deleted file mode 100644 index 18d204273..000000000 --- a/main/src/com/google/gridworks/clustering/binning/BinningClusterer.java +++ /dev/null @@ -1,169 +0,0 @@ -package com.google.gridworks.clustering.binning; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.TreeMap; -import java.util.Map.Entry; - -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.google.gridworks.browsing.Engine; -import com.google.gridworks.browsing.FilteredRows; -import com.google.gridworks.browsing.RowVisitor; -import com.google.gridworks.clustering.Clusterer; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; - -public class BinningClusterer extends Clusterer { - - private Keyer _keyer; - - static final protected Map _keyers = new HashMap(); - - final static Logger logger = LoggerFactory.getLogger("binning_clusterer"); - - List> _clusters; - - static { - _keyers.put("fingerprint", new FingerprintKeyer()); - _keyers.put("ngram-fingerprint", new NGramFingerprintKeyer()); - _keyers.put("metaphone", new MetaphoneKeyer()); - _keyers.put("double-metaphone", new DoubleMetaphoneKeyer()); - _keyers.put("soundex", new SoundexKeyer()); - } - - class BinningRowVisitor implements RowVisitor { - - Keyer _keyer; - Object[] _params; - JSONObject _config; - - Map> _map = new HashMap>(); - - public BinningRowVisitor(Keyer k, JSONObject o) { - _keyer = k; - _config = o; - if (k instanceof NGramFingerprintKeyer) { - try { - int size = _config.getJSONObject("params").getInt("ngram-size"); - logger.debug("Using ngram size: {}", size); - _params = new Object[1]; - _params[0] = size; - } catch (JSONException e) { - //Gridworks.warn("No params specified, using default"); - } - } - } - - @Override - public void start(Project project) { - // nothing to do - } - - @Override - public void end(Project project) { - // nothing to do - } - - public boolean visit(Project project, int rowIndex, Row row) { - Cell cell = row.getCell(_colindex); - if (cell != null && cell.value != null) { - Object v = cell.value; - String s = (v instanceof String) ? ((String) v) : v.toString(); - String key = _keyer.key(s,_params); - if (_map.containsKey(key)) { - Map m = _map.get(key); - if (m.containsKey(s)) { - m.put(s, m.get(s) + 1); - } else { - m.put(s,1); - } - } else { - Map m = new TreeMap(); - m.put(s,1); - _map.put(key, m); - } - } - return false; - } - - public Map> getMap() { - return _map; - } - } - - public static class SizeComparator implements Comparator>, Serializable { - private static final long serialVersionUID = -1390696157208674054L; - public int compare(Map o1, Map o2) { - int s1 = o1.size(); - int s2 = o2.size(); - if (o1 == o2) { - int total1 = 0; - for (int i : o1.values()) { - total1 += i; - } - int total2 = 0; - for (int i : o2.values()) { - total2 += i; - } - return total2 - total1; - } else { - return s2 - s1; - } - } - } - - public static class EntriesComparator implements Comparator>, Serializable { - private static final long serialVersionUID = 2763378036791777964L; - public int compare(Entry o1, Entry o2) { - return o2.getValue() - o1.getValue(); - } - } - - public void initializeFromJSON(Project project, JSONObject o) throws Exception { - super.initializeFromJSON(project, o); - _keyer = _keyers.get(o.getString("function").toLowerCase()); - } - - public void computeClusters(Engine engine) { - BinningRowVisitor visitor = new BinningRowVisitor(_keyer,_config); - FilteredRows filteredRows = engine.getAllFilteredRows(); - filteredRows.accept(_project, visitor); - - Map> map = visitor.getMap(); - _clusters = new ArrayList>(map.values()); - Collections.sort(_clusters, new SizeComparator()); - } - - public void write(JSONWriter writer, Properties options) throws JSONException { - EntriesComparator c = new EntriesComparator(); - - writer.array(); - for (Map m : _clusters) { - if (m.size() > 1) { - writer.array(); - List> entries = new ArrayList>(m.entrySet()); - Collections.sort(entries,c); - for (Entry e : entries) { - writer.object(); - writer.key("v"); writer.value(e.getKey()); - writer.key("c"); writer.value(e.getValue()); - writer.endObject(); - } - writer.endArray(); - } - } - writer.endArray(); - } -} diff --git a/main/src/com/google/gridworks/clustering/binning/DoubleMetaphoneKeyer.java b/main/src/com/google/gridworks/clustering/binning/DoubleMetaphoneKeyer.java deleted file mode 100644 index dfb453ed8..000000000 --- a/main/src/com/google/gridworks/clustering/binning/DoubleMetaphoneKeyer.java +++ /dev/null @@ -1,18 +0,0 @@ -package com.google.gridworks.clustering.binning; - -import org.apache.commons.codec.language.DoubleMetaphone; - -public class DoubleMetaphoneKeyer extends Keyer { - - private DoubleMetaphone _metaphone2; - - public DoubleMetaphoneKeyer() { - _metaphone2 = new DoubleMetaphone(); - _metaphone2.setMaxCodeLen(2000); - } - - public String key(String s, Object... o) { - return _metaphone2.doubleMetaphone(s); - } - -} diff --git a/main/src/com/google/gridworks/clustering/binning/FingerprintKeyer.java b/main/src/com/google/gridworks/clustering/binning/FingerprintKeyer.java deleted file mode 100644 index c5a9303a7..000000000 --- a/main/src/com/google/gridworks/clustering/binning/FingerprintKeyer.java +++ /dev/null @@ -1,249 +0,0 @@ -package com.google.gridworks.clustering.binning; - -import java.util.Iterator; -import java.util.TreeSet; -import java.util.regex.Pattern; - -import org.apache.commons.lang.StringUtils; - -public class FingerprintKeyer extends Keyer { - - static final Pattern alphanum = Pattern.compile("\\p{Punct}|\\p{Cntrl}"); - - public String key(String s, Object... o) { - s = s.trim(); // first off, remove whitespace around the string - s = s.toLowerCase(); // then lowercase it - s = alphanum.matcher(s).replaceAll(""); // then remove all punctuation and control chars - String[] frags = StringUtils.split(s); // split by whitespace - TreeSet set = new TreeSet(); - for (String ss : frags) { - set.add(ss); // order fragments and dedupe - } - StringBuffer b = new StringBuffer(); - Iterator i = set.iterator(); - while (i.hasNext()) { // join ordered fragments back together - b.append(i.next()); - b.append(' '); - } - return asciify(b.toString()); // find ASCII equivalent to characters - } - - protected String asciify(String s) { - char[] c = s.toCharArray(); - StringBuffer b = new StringBuffer(); - for (int i = 0; i < c.length; i++) { - b.append(translate(c[i])); - } - return b.toString(); - } - - /** - * Translate the given unicode char in the closest ASCII representation - * NOTE: this function deals only with latin-1 supplement and latin-1 extended code charts - */ - private char translate(char c) { - switch(c) { - case '\u00C0': - case '\u00C1': - case '\u00C2': - case '\u00C3': - case '\u00C4': - case '\u00C5': - case '\u00E0': - case '\u00E1': - case '\u00E2': - case '\u00E3': - case '\u00E4': - case '\u00E5': - case '\u0100': - case '\u0101': - case '\u0102': - case '\u0103': - case '\u0104': - case '\u0105': - return 'a'; - case '\u00C7': - case '\u00E7': - case '\u0106': - case '\u0107': - case '\u0108': - case '\u0109': - case '\u010A': - case '\u010B': - case '\u010C': - case '\u010D': - return 'c'; - case '\u00D0': - case '\u00F0': - case '\u010E': - case '\u010F': - case '\u0110': - case '\u0111': - return 'd'; - case '\u00C8': - case '\u00C9': - case '\u00CA': - case '\u00CB': - case '\u00E8': - case '\u00E9': - case '\u00EA': - case '\u00EB': - case '\u0112': - case '\u0113': - case '\u0114': - case '\u0115': - case '\u0116': - case '\u0117': - case '\u0118': - case '\u0119': - case '\u011A': - case '\u011B': - return 'e'; - case '\u011C': - case '\u011D': - case '\u011E': - case '\u011F': - case '\u0120': - case '\u0121': - case '\u0122': - case '\u0123': - return 'g'; - case '\u0124': - case '\u0125': - case '\u0126': - case '\u0127': - return 'h'; - case '\u00CC': - case '\u00CD': - case '\u00CE': - case '\u00CF': - case '\u00EC': - case '\u00ED': - case '\u00EE': - case '\u00EF': - case '\u0128': - case '\u0129': - case '\u012A': - case '\u012B': - case '\u012C': - case '\u012D': - case '\u012E': - case '\u012F': - case '\u0130': - case '\u0131': - return 'i'; - case '\u0134': - case '\u0135': - return 'j'; - case '\u0136': - case '\u0137': - case '\u0138': - return 'k'; - case '\u0139': - case '\u013A': - case '\u013B': - case '\u013C': - case '\u013D': - case '\u013E': - case '\u013F': - case '\u0140': - case '\u0141': - case '\u0142': - return 'l'; - case '\u00D1': - case '\u00F1': - case '\u0143': - case '\u0144': - case '\u0145': - case '\u0146': - case '\u0147': - case '\u0148': - case '\u0149': - case '\u014A': - case '\u014B': - return 'n'; - case '\u00D2': - case '\u00D3': - case '\u00D4': - case '\u00D5': - case '\u00D6': - case '\u00D8': - case '\u00F2': - case '\u00F3': - case '\u00F4': - case '\u00F5': - case '\u00F6': - case '\u00F8': - case '\u014C': - case '\u014D': - case '\u014E': - case '\u014F': - case '\u0150': - case '\u0151': - return 'o'; - case '\u0154': - case '\u0155': - case '\u0156': - case '\u0157': - case '\u0158': - case '\u0159': - return 'r'; - case '\u015A': - case '\u015B': - case '\u015C': - case '\u015D': - case '\u015E': - case '\u015F': - case '\u0160': - case '\u0161': - case '\u017F': - return 's'; - case '\u0162': - case '\u0163': - case '\u0164': - case '\u0165': - case '\u0166': - case '\u0167': - return 't'; - case '\u00D9': - case '\u00DA': - case '\u00DB': - case '\u00DC': - case '\u00F9': - case '\u00FA': - case '\u00FB': - case '\u00FC': - case '\u0168': - case '\u0169': - case '\u016A': - case '\u016B': - case '\u016C': - case '\u016D': - case '\u016E': - case '\u016F': - case '\u0170': - case '\u0171': - case '\u0172': - case '\u0173': - return 'u'; - case '\u0174': - case '\u0175': - return 'w'; - case '\u00DD': - case '\u00FD': - case '\u00FF': - case '\u0176': - case '\u0177': - case '\u0178': - return 'y'; - case '\u0179': - case '\u017A': - case '\u017B': - case '\u017C': - case '\u017D': - case '\u017E': - return 'z'; - } - return c; - } -} diff --git a/main/src/com/google/gridworks/clustering/binning/Keyer.java b/main/src/com/google/gridworks/clustering/binning/Keyer.java deleted file mode 100644 index 2b00206aa..000000000 --- a/main/src/com/google/gridworks/clustering/binning/Keyer.java +++ /dev/null @@ -1,12 +0,0 @@ -package com.google.gridworks.clustering.binning; - - -public abstract class Keyer { - - public String key(String s) { - return this.key(s, (Object[]) null); - } - - public abstract String key(String string, Object... params); - -} diff --git a/main/src/com/google/gridworks/clustering/binning/MetaphoneKeyer.java b/main/src/com/google/gridworks/clustering/binning/MetaphoneKeyer.java deleted file mode 100644 index 93a1e70a6..000000000 --- a/main/src/com/google/gridworks/clustering/binning/MetaphoneKeyer.java +++ /dev/null @@ -1,18 +0,0 @@ -package com.google.gridworks.clustering.binning; - -import org.apache.commons.codec.language.Metaphone; - -public class MetaphoneKeyer extends Keyer { - - private Metaphone _metaphone; - - public MetaphoneKeyer() { - _metaphone = new Metaphone(); - _metaphone.setMaxCodeLen(2000); - } - - public String key(String s, Object... o) { - return _metaphone.metaphone(s); - } - -} diff --git a/main/src/com/google/gridworks/clustering/binning/NGramFingerprintKeyer.java b/main/src/com/google/gridworks/clustering/binning/NGramFingerprintKeyer.java deleted file mode 100644 index f28ad026c..000000000 --- a/main/src/com/google/gridworks/clustering/binning/NGramFingerprintKeyer.java +++ /dev/null @@ -1,35 +0,0 @@ -package com.google.gridworks.clustering.binning; - -import java.util.Iterator; -import java.util.TreeSet; -import java.util.regex.Pattern; - -public class NGramFingerprintKeyer extends FingerprintKeyer { - - static final Pattern alphanum = Pattern.compile("\\p{Punct}|\\p{Cntrl}|\\p{Space}"); - - public String key(String s, Object... o) { - int ngram_size = 2; - if (o != null && o.length > 0 && o[0] instanceof Number) { - ngram_size = (Integer) o[0]; - } - s = s.toLowerCase(); // then lowercase it - s = alphanum.matcher(s).replaceAll(""); // then remove all punctuation and control chars - TreeSet set = ngram_split(s,ngram_size); - StringBuffer b = new StringBuffer(); - Iterator i = set.iterator(); - while (i.hasNext()) { // join ordered fragments back together - b.append(i.next()); - } - return asciify(b.toString()); // find ASCII equivalent to characters - } - - protected TreeSet ngram_split(String s, int size) { - TreeSet set = new TreeSet(); - char[] chars = s.toCharArray(); - for (int i = 0; i + size <= chars.length; i++) { - set.add(new String(chars,i,size)); - } - return set; - } -} diff --git a/main/src/com/google/gridworks/clustering/binning/SoundexKeyer.java b/main/src/com/google/gridworks/clustering/binning/SoundexKeyer.java deleted file mode 100644 index 6f9e4f8f8..000000000 --- a/main/src/com/google/gridworks/clustering/binning/SoundexKeyer.java +++ /dev/null @@ -1,17 +0,0 @@ -package com.google.gridworks.clustering.binning; - -import org.apache.commons.codec.language.Soundex; - -public class SoundexKeyer extends Keyer { - - private Soundex _soundex; - - public SoundexKeyer() { - _soundex = new Soundex(); - } - - public String key(String s, Object... o) { - return _soundex.soundex(s); - } - -} diff --git a/main/src/com/google/gridworks/clustering/knn/kNNClusterer.java b/main/src/com/google/gridworks/clustering/knn/kNNClusterer.java deleted file mode 100644 index 63b24edc6..000000000 --- a/main/src/com/google/gridworks/clustering/knn/kNNClusterer.java +++ /dev/null @@ -1,211 +0,0 @@ -package com.google.gridworks.clustering.knn; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.Set; -import java.util.Map.Entry; - -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.google.gridworks.browsing.Engine; -import com.google.gridworks.browsing.FilteredRows; -import com.google.gridworks.browsing.RowVisitor; -import com.google.gridworks.clustering.Clusterer; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; - -import edu.mit.simile.vicino.clustering.NGramClusterer; -import edu.mit.simile.vicino.clustering.VPTreeClusterer; -import edu.mit.simile.vicino.distances.BZip2Distance; -import edu.mit.simile.vicino.distances.Distance; -import edu.mit.simile.vicino.distances.GZipDistance; -import edu.mit.simile.vicino.distances.JaccardDistance; -import edu.mit.simile.vicino.distances.JaroDistance; -import edu.mit.simile.vicino.distances.JaroWinklerDistance; -import edu.mit.simile.vicino.distances.JaroWinklerTFIDFDistance; -import edu.mit.simile.vicino.distances.LevenshteinDistance; -import edu.mit.simile.vicino.distances.PPMDistance; - -public class kNNClusterer extends Clusterer { - - private Distance _distance; - - static final protected Map _distances = new HashMap(); - - List> _clusters; - - Map _counts = new HashMap(); - - final static Logger logger = LoggerFactory.getLogger("kNN_clusterer"); - - static { - _distances.put("levenshtein", new LevenshteinDistance()); - _distances.put("jaccard", new JaccardDistance()); - _distances.put("jaro", new JaroDistance()); - _distances.put("jaro-winkler", new JaroWinklerDistance()); - _distances.put("jaro-winkler-tfidf", new JaroWinklerTFIDFDistance()); - _distances.put("gzip", new GZipDistance()); - _distances.put("bzip2", new BZip2Distance()); - _distances.put("ppm", new PPMDistance()); - } - - class VPTreeClusteringRowVisitor implements RowVisitor { - - Distance _distance; - JSONObject _config; - VPTreeClusterer _clusterer; - double _radius = 1.0f; - - public VPTreeClusteringRowVisitor(Distance d, JSONObject o) { - _distance = d; - _config = o; - _clusterer = new VPTreeClusterer(_distance); - try { - JSONObject params = o.getJSONObject("params"); - _radius = params.getDouble("radius"); - } catch (JSONException e) { - //Gridworks.warn("No parameters found, using defaults"); - } - } - - @Override - public void start(Project project) { - // nothing to do - } - - @Override - public void end(Project project) { - // nothing to do - } - - public boolean visit(Project project, int rowIndex, Row row) { - Cell cell = row.getCell(_colindex); - if (cell != null && cell.value != null) { - Object v = cell.value; - String s = (v instanceof String) ? ((String) v) : v.toString(); - _clusterer.populate(s); - count(s); - } - return false; - } - - public List> getClusters() { - return _clusterer.getClusters(_radius); - } - } - - class BlockingClusteringRowVisitor implements RowVisitor { - - Distance _distance; - JSONObject _config; - double _radius = 1.0d; - int _blockingNgramSize = 6; - HashSet _data; - NGramClusterer _clusterer; - - public BlockingClusteringRowVisitor(Distance d, JSONObject o) { - _distance = d; - _config = o; - _data = new HashSet(); - try { - JSONObject params = o.getJSONObject("params"); - _radius = params.getDouble("radius"); - logger.debug("Use radius: {}", _radius); - _blockingNgramSize = params.getInt("blocking-ngram-size"); - logger.debug("Use blocking ngram size: {}",_blockingNgramSize); - } catch (JSONException e) { - logger.debug("No parameters found, using defaults"); - } - _clusterer = new NGramClusterer(_distance, _blockingNgramSize); - } - - @Override - public void start(Project project) { - // nothing to do - } - - @Override - public void end(Project project) { - // nothing to do - } - - public boolean visit(Project project, int rowIndex, Row row) { - Cell cell = row.getCell(_colindex); - if (cell != null && cell.value != null) { - Object v = cell.value; - String s = (v instanceof String) ? ((String) v) : v.toString().intern(); - _clusterer.populate(s); - count(s); - } - return false; - } - - public List> getClusters() { - return _clusterer.getClusters(_radius); - } - } - - public void initializeFromJSON(Project project, JSONObject o) throws Exception { - super.initializeFromJSON(project, o); - _distance = _distances.get(o.getString("function").toLowerCase()); - } - - public void computeClusters(Engine engine) { - //VPTreeClusteringRowVisitor visitor = new VPTreeClusteringRowVisitor(_distance,_config); - BlockingClusteringRowVisitor visitor = new BlockingClusteringRowVisitor(_distance,_config); - FilteredRows filteredRows = engine.getAllFilteredRows(); - filteredRows.accept(_project, visitor); - - _clusters = visitor.getClusters(); - } - - public static class ValuesComparator implements Comparator>, Serializable { - private static final long serialVersionUID = 204469656070583155L; - public int compare(Entry o1, Entry o2) { - return o2.getValue() - o1.getValue(); - } - } - - public void write(JSONWriter writer, Properties options) throws JSONException { - writer.array(); - for (Set m : _clusters) { - if (m.size() > 1) { - Map internal_counts = new HashMap(); - for (Serializable s : m) { - internal_counts.put(s,_counts.get(s)); - } - List> values = new ArrayList>(internal_counts.entrySet()); - Collections.sort(values, new ValuesComparator()); - writer.array(); - for (Entry e : values) { - writer.object(); - writer.key("v"); writer.value(e.getKey()); - writer.key("c"); writer.value(e.getValue()); - writer.endObject(); - } - writer.endArray(); - } - } - writer.endArray(); - } - - private void count(Serializable s) { - if (_counts.containsKey(s)) { - _counts.put(s, _counts.get(s) + 1); - } else { - _counts.put(s, 1); - } - } -} diff --git a/main/src/com/google/gridworks/commands/Command.java b/main/src/com/google/gridworks/commands/Command.java deleted file mode 100644 index 59bade277..000000000 --- a/main/src/com/google/gridworks/commands/Command.java +++ /dev/null @@ -1,272 +0,0 @@ -package com.google.gridworks.commands; - -import java.io.IOException; -import java.io.PrintWriter; -import java.io.StringWriter; -import java.io.Writer; -import java.util.Properties; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.google.gridworks.GridworksServlet; -import com.google.gridworks.Jsonizable; -import com.google.gridworks.ProjectManager; -import com.google.gridworks.ProjectMetadata; -import com.google.gridworks.browsing.Engine; -import com.google.gridworks.history.HistoryEntry; -import com.google.gridworks.model.Project; -import com.google.gridworks.process.Process; -import com.google.gridworks.util.ParsingUtilities; - -/** - * The super class of all calls that the client side can invoke, most of which - * are AJAX calls. - */ -public abstract class Command { - - final static protected Logger logger = LoggerFactory.getLogger("command"); - - protected GridworksServlet servlet; - - public void init(GridworksServlet servlet) { - this.servlet = servlet; - } - - public void doPost(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - throw new UnsupportedOperationException(); - }; - - public void doGet(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - throw new UnsupportedOperationException(); - }; - - /** - * Utility function to get the browsing engine's configuration as a JSON object - * from the "engine" request parameter, most often in the POST body. - * - * @param request - * @return - * @throws JSONException - */ - static protected JSONObject getEngineConfig(HttpServletRequest request) - throws JSONException { - if (request == null) throw new IllegalArgumentException("parameter 'request' should not be null"); - - String json = request.getParameter("engine"); - try{ - return (json == null) ? null : ParsingUtilities.evaluateJsonStringToObject(json); - } catch (JSONException e){ - logger.debug( json + " could not be parsed to JSON"); - return null; - } - } - - /** - * Utility function to reconstruct the browsing engine from the "engine" request parameter, - * most often in the POST body. - * - * @param request - * @param project - * @return - * @throws Exception - */ - static protected Engine getEngine(HttpServletRequest request, Project project) - throws Exception { - if (request == null) throw new IllegalArgumentException("parameter 'request' should not be null"); - if (project == null) throw new IllegalArgumentException("parameter 'project' should not be null"); - - Engine engine = new Engine(project); - JSONObject o = getEngineConfig(request); - if (o != null) - engine.initializeFromJSON(o); - return engine; - } - - /** - * Utility method for retrieving the Project object having the ID specified - * in the "project" URL parameter. - * - * @param request - * @return - * @throws ServletException - */ - protected Project getProject(HttpServletRequest request) throws ServletException { - if (request == null) throw new IllegalArgumentException("parameter 'request' should not be null"); - try { - Project p = ProjectManager.singleton.getProject(Long.parseLong(request.getParameter("project"))); - if (p != null) { - return p; - } - } catch (Exception e) { - // ignore - } - throw new ServletException("Can't find project: missing or bad URL parameter"); - } - - /** - * Utility method for retrieving the ProjectMetadata object having the ID specified - * in the "project" URL parameter. - * - * @param request - * @return - * @throws ServletException - */ - protected ProjectMetadata getProjectMetadata(HttpServletRequest request) throws ServletException { - if (request == null) throw new IllegalArgumentException("parameter 'request' should not be null"); - try { - ProjectMetadata pm = ProjectManager.singleton.getProjectMetadata(Long.parseLong(request.getParameter("project"))); - if (pm != null) { - return pm; - } - } catch (Exception e) { - // ignore - } - throw new ServletException("Can't find project metadata: missing or bad URL parameter"); - } - - static protected int getIntegerParameter(HttpServletRequest request, String name, int def) { - if (request == null) throw new IllegalArgumentException("parameter 'request' should not be null"); - try { - return Integer.parseInt(request.getParameter(name)); - } catch (Exception e) { - // ignore - } - return def; - } - - static protected JSONObject getJsonParameter(HttpServletRequest request, String name) { - if (request == null) throw new IllegalArgumentException("parameter 'request' should not be null"); - String value = request.getParameter(name); - if (value != null) { - try { - return ParsingUtilities.evaluateJsonStringToObject(value); - } catch (JSONException e) { - logger.warn("error getting json parameter",e); - } - } - return null; - } - - static protected void performProcessAndRespond( - HttpServletRequest request, - HttpServletResponse response, - Project project, - Process process - ) throws Exception { - response.setCharacterEncoding("UTF-8"); - response.setHeader("Content-Type", "application/json"); - - HistoryEntry historyEntry = project.processManager.queueProcess(process); - if (historyEntry != null) { - Writer w = response.getWriter(); - JSONWriter writer = new JSONWriter(w); - Properties options = new Properties(); - - writer.object(); - writer.key("code"); writer.value("ok"); - writer.key("historyEntry"); historyEntry.write(writer, options); - writer.endObject(); - - w.flush(); - w.close(); - } else { - respond(response, "{ \"code\" : \"pending\" }"); - } - } - - static protected void respond(HttpServletResponse response, String content) - throws IOException, ServletException { - - response.setCharacterEncoding("UTF-8"); - response.setStatus(HttpServletResponse.SC_OK); - Writer w = response.getWriter(); - if (w != null) { - w.write(content); - w.flush(); - w.close(); - } else { - throw new ServletException("response returned a null writer"); - } - } - - static protected void respond(HttpServletResponse response, String status, String message) - throws IOException, JSONException { - - Writer w = response.getWriter(); - JSONWriter writer = new JSONWriter(w); - writer.object(); - writer.key("status"); writer.value(status); - writer.key("message"); writer.value(message); - writer.endObject(); - w.flush(); - w.close(); - } - - static protected void respondJSON(HttpServletResponse response, Jsonizable o) - throws IOException, JSONException { - - respondJSON(response, o, new Properties()); - } - - static protected void respondJSON( - HttpServletResponse response, Jsonizable o, Properties options) - throws IOException, JSONException { - - response.setCharacterEncoding("UTF-8"); - response.setHeader("Content-Type", "application/json"); - - Writer w = response.getWriter(); - JSONWriter writer = new JSONWriter(w); - - o.write(writer, options); - w.flush(); - w.close(); - } - - static protected void respondException(HttpServletResponse response, Exception e) - throws IOException, ServletException { - - logger.warn("Exception caught", e); - - if (response == null) { - throw new ServletException("Response object can't be null"); - } - - try { - JSONObject o = new JSONObject(); - o.put("code", "error"); - o.put("message", e.getMessage()); - - StringWriter sw = new StringWriter(); - PrintWriter pw = new PrintWriter(sw); - e.printStackTrace(pw); - pw.flush(); - sw.flush(); - - o.put("stack", sw.toString()); - - response.setCharacterEncoding("UTF-8"); - response.setHeader("Content-Type", "application/json"); - respond(response, o.toString()); - } catch (JSONException e1) { - e.printStackTrace(response.getWriter()); - } - } - - static protected void redirect(HttpServletResponse response, String url) throws IOException { - response.sendRedirect(url); - } - -} diff --git a/main/src/com/google/gridworks/commands/EngineDependentCommand.java b/main/src/com/google/gridworks/commands/EngineDependentCommand.java deleted file mode 100644 index d8d3c91e7..000000000 --- a/main/src/com/google/gridworks/commands/EngineDependentCommand.java +++ /dev/null @@ -1,56 +0,0 @@ -package com.google.gridworks.commands; - -import java.io.IOException; -import java.util.Properties; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.json.JSONObject; - -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Project; -import com.google.gridworks.process.Process; - -/** - * Convenient super class for commands that perform abstract operations on - * only the filtered rows based on the faceted browsing engine's configuration - * on the client side. - * - * The engine's configuration is passed over as a POST body parameter. It is - * retrieved, de-serialized, and used to construct the abstract operation. - * The operation is then used to construct a process. The process is then - * queued for execution. If the process is not long running and there is no - * other queued process, then it gets executed right away, resulting in some - * change to the history. Otherwise, it is pending. The client side can - * decide how to update its UI depending on whether the process is done or - * still pending. - * - * Note that there are interactions on the client side that change only - * individual cells or individual rows (such as starring one row or editing - * the text of one cell). These interactions do not depend on the faceted - * browsing engine's configuration, and so they don't invoke commands that - * subclass this class. See AnnotateOneRowCommand and EditOneCellCommand as - * examples. - */ -abstract public class EngineDependentCommand extends Command { - @Override - public void doPost(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - try { - Project project = getProject(request); - - AbstractOperation op = createOperation(project, request, getEngineConfig(request)); - Process process = op.createProcess(project, new Properties()); - - performProcessAndRespond(request, response, project, process); - } catch (Exception e) { - respondException(response, e); - } - } - - abstract protected AbstractOperation createOperation( - Project project, HttpServletRequest request, JSONObject engineConfig) throws Exception; -} diff --git a/main/src/com/google/gridworks/commands/GetAllPreferencesCommand.java b/main/src/com/google/gridworks/commands/GetAllPreferencesCommand.java deleted file mode 100644 index 001d66cb7..000000000 --- a/main/src/com/google/gridworks/commands/GetAllPreferencesCommand.java +++ /dev/null @@ -1,48 +0,0 @@ -package com.google.gridworks.commands; - -import java.io.IOException; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.ProjectManager; -import com.google.gridworks.model.Project; -import com.google.gridworks.preference.PreferenceStore; - -public class GetAllPreferencesCommand extends Command { - @Override - public void doPost(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - Project project = request.getParameter("project") != null ? getProject(request) : null; - PreferenceStore ps = project != null ? - project.getMetadata().getPreferenceStore() : - ProjectManager.singleton.getPreferenceStore(); - - try { - response.setCharacterEncoding("UTF-8"); - response.setHeader("Content-Type", "application/json"); - - JSONWriter writer = new JSONWriter(response.getWriter()); - - writer.object(); - - for (String key : ps.getKeys()) { - Object pref = ps.get(key); - if (pref == null || pref instanceof String || pref instanceof Number || pref instanceof Boolean) { - writer.key(key); - writer.value(pref); - } - } - - writer.endObject(); - } catch (JSONException e) { - respondException(response, e); - } - } - -} diff --git a/main/src/com/google/gridworks/commands/GetPreferenceCommand.java b/main/src/com/google/gridworks/commands/GetPreferenceCommand.java deleted file mode 100644 index b3a42f8fd..000000000 --- a/main/src/com/google/gridworks/commands/GetPreferenceCommand.java +++ /dev/null @@ -1,54 +0,0 @@ -package com.google.gridworks.commands; - -import java.io.IOException; -import java.util.Properties; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.ProjectManager; -import com.google.gridworks.model.Project; -import com.google.gridworks.preference.PreferenceStore; -import com.google.gridworks.preference.TopList; - -public class GetPreferenceCommand extends Command { - @Override - public void doGet(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - Project project = request.getParameter("project") != null ? getProject(request) : null; - PreferenceStore ps = project != null ? - project.getMetadata().getPreferenceStore() : - ProjectManager.singleton.getPreferenceStore(); - - String prefName = request.getParameter("name"); - Object pref = ps.get(prefName); - - try { - response.setCharacterEncoding("UTF-8"); - response.setHeader("Content-Type", "application/json"); - - JSONWriter writer = new JSONWriter(response.getWriter()); - - writer.object(); - writer.key("value"); - if (pref == null || pref instanceof String || pref instanceof Number || pref instanceof Boolean) { - writer.value(pref); - } else if (pref instanceof TopList) { - TopList tl = (TopList) pref; - tl.write(writer, new Properties()); - } else { - writer.value(pref.toString()); - } - - writer.endObject(); - } catch (JSONException e) { - respondException(response, e); - } - } - -} diff --git a/main/src/com/google/gridworks/commands/OpenWorkspaceDirCommand.java b/main/src/com/google/gridworks/commands/OpenWorkspaceDirCommand.java deleted file mode 100644 index 2352e910e..000000000 --- a/main/src/com/google/gridworks/commands/OpenWorkspaceDirCommand.java +++ /dev/null @@ -1,37 +0,0 @@ -package com.google.gridworks.commands; - -import java.io.File; -import java.io.IOException; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import com.google.gridworks.ProjectManager; -import com.google.gridworks.io.FileProjectManager; - -public class OpenWorkspaceDirCommand extends Command { - @Override - public void doPost(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - String serverName = request.getServerName(); - - if (!"127.0.0.1".equals(serverName) && !"localhost".equals(serverName)) { - respond(response, "{ \"code\" : \"error\", \"message\" : \"Workspace directory can only be opened on the local machine where Gridworks is run.\" }"); - } else if (ProjectManager.singleton instanceof FileProjectManager) { - File dir = ((FileProjectManager) ProjectManager.singleton).getWorkspaceDir(); - - Runtime.getRuntime().exec( - "open .", - new String[] {}, - dir - ); - - respond(response, "{ \"code\" : \"ok\" }"); - } else { - respond(response, "{ \"code\" : \"error\", \"message\" : \"Workspace is not stored on the file system.\" }"); - } - } - -} diff --git a/main/src/com/google/gridworks/commands/SetPreferenceCommand.java b/main/src/com/google/gridworks/commands/SetPreferenceCommand.java deleted file mode 100644 index c380737e4..000000000 --- a/main/src/com/google/gridworks/commands/SetPreferenceCommand.java +++ /dev/null @@ -1,40 +0,0 @@ -package com.google.gridworks.commands; - -import java.io.IOException; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.json.JSONException; -import org.json.JSONTokener; - -import com.google.gridworks.ProjectManager; -import com.google.gridworks.model.Project; -import com.google.gridworks.preference.PreferenceStore; - -public class SetPreferenceCommand extends Command { - @Override - public void doPost(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - Project project = request.getParameter("project") != null ? getProject(request) : null; - PreferenceStore ps = project != null ? - project.getMetadata().getPreferenceStore() : - ProjectManager.singleton.getPreferenceStore(); - - String prefName = request.getParameter("name"); - String valueString = request.getParameter("value"); - - try { - Object o = valueString == null ? null : new JSONTokener(valueString).nextValue(); - - ps.put(prefName, PreferenceStore.loadObject(o)); - - respond(response, "{ \"code\" : \"ok\" }"); - } catch (JSONException e) { - respondException(response, e); - } - } - -} diff --git a/main/src/com/google/gridworks/commands/auth/AuthorizeCommand.java b/main/src/com/google/gridworks/commands/auth/AuthorizeCommand.java deleted file mode 100644 index 7a23a4207..000000000 --- a/main/src/com/google/gridworks/commands/auth/AuthorizeCommand.java +++ /dev/null @@ -1,134 +0,0 @@ -package com.google.gridworks.commands.auth; - -import java.io.IOException; -import java.io.PrintWriter; -import java.net.URI; -import java.net.URISyntaxException; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import oauth.signpost.OAuthConsumer; -import oauth.signpost.OAuthProvider; - -import com.google.gridworks.commands.Command; -import com.google.gridworks.oauth.Credentials; -import com.google.gridworks.oauth.OAuthUtilities; -import com.google.gridworks.oauth.Provider; - -public class AuthorizeCommand extends Command { - - private static final String OAUTH_VERIFIER_PARAM = "oauth_verifier"; - - public void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { - - // get the provider from the request - Provider provider = OAuthUtilities.getProvider(request); - - try { - - // see if the request comes with access credentials - Credentials access_credentials = Credentials.getCredentials(request, provider, Credentials.Type.ACCESS); - - // prepare the continuation URL that the OAuth provider will redirect the user to - // (we need to make sure this URL points back to this code or the dance will never complete) - String callbackURL = getBaseURL(request,provider); - - if (access_credentials == null) { - // access credentials are not available so we need to check - // to see at what stage of the OAuth dance we are - - // get the request token credentials - Credentials request_credentials = Credentials.getCredentials(request, provider, Credentials.Type.REQUEST); - - OAuthConsumer consumer = OAuthUtilities.getConsumer(request_credentials, provider); - OAuthProvider pp = OAuthUtilities.getOAuthProvider(provider); - - if (request_credentials == null) { - // no credentials were found, so let's start the dance - - // get the request token - - String url = pp.retrieveRequestToken(consumer, callbackURL); - - request_credentials = new Credentials(consumer.getToken(), consumer.getTokenSecret(), provider); - - // and set them to that we can retrieve them later in the second part of the dance - Credentials.setCredentials(request, response, request_credentials, Credentials.Type.REQUEST, 3600); - - // now redirect the user to the Authorize URL where she can authenticate against the - // service provider and authorize us. - // The provider will bounce the user back here for us to continue the dance. - - response.sendRedirect(url); - } else { - // we are at the second stage of the dance, so we need need to obtain the access credentials now - - // if we got here, it means that the user performed a valid authentication against the - // service provider and authorized us, so now we can request more permanent credentials - // to the service provider and save those as well for later use. - - // this is set only for OAuth 1.0a - String verificationCode = request.getParameter(OAUTH_VERIFIER_PARAM); - - pp.retrieveAccessToken(consumer, verificationCode); - - access_credentials = new Credentials(consumer.getToken(), consumer.getTokenSecret(), provider); - - // no matter the result, we need to remove the request token - Credentials.deleteCredentials(request, response, provider, Credentials.Type.REQUEST); - - Credentials.setCredentials(request, response, access_credentials, Credentials.Type.ACCESS, 30 * 24 * 3600); - - finish(response); - } - } else { - finish(response); - } - } catch (Exception e) { - Credentials.deleteCredentials(request, response, provider, Credentials.Type.REQUEST); - Credentials.deleteCredentials(request, response, provider, Credentials.Type.ACCESS); - respondException(response, e); - } - } - - private void finish(HttpServletResponse response) throws IOException { - response.setCharacterEncoding("UTF-8"); - response.setHeader("Content-Type", "text/html"); - - PrintWriter writer = response.getWriter(); - writer.write( - "" + - "" + - "" + - "" - ); - writer.flush(); - } - - private String getBaseURL(HttpServletRequest request, Provider provider) { - String host = request.getHeader("host"); - if (host == null) { - String referrer = request.getHeader("referer"); - if (referrer != null) { - URI url; - try { - url = new URI(referrer); - int port = url.getPort(); - host = url.getHost() + ((port > -1) ? ":" + url.getPort() : ""); - } catch (URISyntaxException e) { - throw new RuntimeException("referrer '" + referrer + "' can't be parsed as a URL"); - } - } else { - throw new RuntimeException("neither the 'host' nor 'referer' headers were present in the HTTP response, I can't determine what URL gridworks is listening to."); - } - } - return "http://" + host + "/command/core/authorize/" + provider.getHost(); - } -} diff --git a/main/src/com/google/gridworks/commands/auth/CheckAuthorizationCommand.java b/main/src/com/google/gridworks/commands/auth/CheckAuthorizationCommand.java deleted file mode 100644 index c329faf81..000000000 --- a/main/src/com/google/gridworks/commands/auth/CheckAuthorizationCommand.java +++ /dev/null @@ -1,47 +0,0 @@ -package com.google.gridworks.commands.auth; - -import java.io.IOException; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.google.gridworks.commands.Command; -import com.google.gridworks.oauth.Credentials; -import com.google.gridworks.oauth.OAuthUtilities; -import com.google.gridworks.oauth.Provider; -import com.google.gridworks.util.FreebaseUtils; - -public class CheckAuthorizationCommand extends Command { - - final static Logger logger = LoggerFactory.getLogger("check-authorization_command"); - - public void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { - - try { - Provider provider = OAuthUtilities.getProvider(request); - - // this cookie should not be there, but this is good hygiene practice - Credentials.deleteCredentials(request, response, provider, Credentials.Type.REQUEST); - - Credentials access_credentials = Credentials.getCredentials(request, provider, Credentials.Type.ACCESS); - - response.setCharacterEncoding("UTF-8"); - response.setHeader("Content-Type", "application/json"); - - if (access_credentials != null) { - String user_info = FreebaseUtils.getUserInfo(access_credentials, provider); - response.getWriter().write(user_info); - } else { - respond(response, "401 Unauthorized", "You don't have the right credentials"); - } - } catch (Exception e) { - logger.info("error",e); - respondException(response, e); - } - } - -} diff --git a/main/src/com/google/gridworks/commands/auth/DeAuthorizeCommand.java b/main/src/com/google/gridworks/commands/auth/DeAuthorizeCommand.java deleted file mode 100644 index 16e5e8fbc..000000000 --- a/main/src/com/google/gridworks/commands/auth/DeAuthorizeCommand.java +++ /dev/null @@ -1,31 +0,0 @@ -package com.google.gridworks.commands.auth; - -import java.io.IOException; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import com.google.gridworks.commands.Command; -import com.google.gridworks.oauth.Credentials; -import com.google.gridworks.oauth.OAuthUtilities; -import com.google.gridworks.oauth.Provider; - -public class DeAuthorizeCommand extends Command { - - public void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { - - try { - response.setCharacterEncoding("UTF-8"); - response.setHeader("Content-Type", "application/json"); - - Provider provider = OAuthUtilities.getProvider(request); - - Credentials.deleteCredentials(request, response, provider, Credentials.Type.ACCESS); - - respond(response, "200 OK", ""); - } catch (Exception e) { - respondException(response, e); - } - } -} diff --git a/main/src/com/google/gridworks/commands/auth/GetUserBadgesCommand.java b/main/src/com/google/gridworks/commands/auth/GetUserBadgesCommand.java deleted file mode 100644 index 460beda46..000000000 --- a/main/src/com/google/gridworks/commands/auth/GetUserBadgesCommand.java +++ /dev/null @@ -1,36 +0,0 @@ -package com.google.gridworks.commands.auth; - -import java.io.IOException; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.google.gridworks.commands.Command; -import com.google.gridworks.oauth.OAuthUtilities; -import com.google.gridworks.oauth.Provider; -import com.google.gridworks.util.FreebaseUtils; - -public class GetUserBadgesCommand extends Command { - - final static Logger logger = LoggerFactory.getLogger("check-authorization_command"); - - public void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { - - try { - Provider provider = OAuthUtilities.getProvider(request); - String user_id = request.getParameter("user_id"); - response.setCharacterEncoding("UTF-8"); - response.setHeader("Content-Type", "application/json"); - String user_badges = FreebaseUtils.getUserBadges(provider, user_id); - response.getWriter().write(user_badges); - } catch (Exception e) { - logger.info("error",e); - respondException(response, e); - } - } - -} diff --git a/main/src/com/google/gridworks/commands/browsing/ComputeClustersCommand.java b/main/src/com/google/gridworks/commands/browsing/ComputeClustersCommand.java deleted file mode 100644 index 8feb8aee0..000000000 --- a/main/src/com/google/gridworks/commands/browsing/ComputeClustersCommand.java +++ /dev/null @@ -1,53 +0,0 @@ -package com.google.gridworks.commands.browsing; - -import java.io.IOException; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.json.JSONObject; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.google.gridworks.browsing.Engine; -import com.google.gridworks.clustering.Clusterer; -import com.google.gridworks.clustering.binning.BinningClusterer; -import com.google.gridworks.clustering.knn.kNNClusterer; -import com.google.gridworks.commands.Command; -import com.google.gridworks.model.Project; - -public class ComputeClustersCommand extends Command { - - final static Logger logger = LoggerFactory.getLogger("compute-clusters_command"); - - @Override - public void doPost(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - try { - long start = System.currentTimeMillis(); - Project project = getProject(request); - Engine engine = getEngine(request, project); - JSONObject clusterer_conf = getJsonParameter(request,"clusterer"); - - Clusterer clusterer = null; - String type = clusterer_conf.has("type") ? clusterer_conf.getString("type") : "binning"; - - if ("knn".equals(type)) { - clusterer = new kNNClusterer(); - } else { - clusterer = new BinningClusterer(); - } - - clusterer.initializeFromJSON(project, clusterer_conf); - - clusterer.computeClusters(engine); - - respondJSON(response, clusterer); - logger.info("computed clusters [{},{}] in {}ms", new Object[] { type, clusterer_conf.getString("function"), Long.toString(System.currentTimeMillis() - start) }); - } catch (Exception e) { - respondException(response, e); - } - } -} diff --git a/main/src/com/google/gridworks/commands/browsing/ComputeFacetsCommand.java b/main/src/com/google/gridworks/commands/browsing/ComputeFacetsCommand.java deleted file mode 100644 index 36ece8868..000000000 --- a/main/src/com/google/gridworks/commands/browsing/ComputeFacetsCommand.java +++ /dev/null @@ -1,29 +0,0 @@ -package com.google.gridworks.commands.browsing; - -import java.io.IOException; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import com.google.gridworks.browsing.Engine; -import com.google.gridworks.commands.Command; -import com.google.gridworks.model.Project; - -public class ComputeFacetsCommand extends Command { - @Override - public void doPost(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - try { - Project project = getProject(request); - Engine engine = getEngine(request, project); - - engine.computeFacets(); - - respondJSON(response, engine); - } catch (Exception e) { - respondException(response, e); - } - } -} diff --git a/main/src/com/google/gridworks/commands/browsing/GetScatterplotCommand.java b/main/src/com/google/gridworks/commands/browsing/GetScatterplotCommand.java deleted file mode 100644 index 10dbcd167..000000000 --- a/main/src/com/google/gridworks/commands/browsing/GetScatterplotCommand.java +++ /dev/null @@ -1,175 +0,0 @@ -package com.google.gridworks.commands.browsing; - -import java.awt.Color; -import java.awt.image.BufferedImage; -import java.io.IOException; -import java.io.OutputStream; - -import javax.imageio.ImageIO; -import javax.servlet.ServletException; -import javax.servlet.ServletOutputStream; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.json.JSONException; -import org.json.JSONObject; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.google.gridworks.browsing.Engine; -import com.google.gridworks.browsing.FilteredRows; -import com.google.gridworks.browsing.facets.ScatterplotDrawingRowVisitor; -import com.google.gridworks.browsing.facets.ScatterplotFacet; -import com.google.gridworks.browsing.util.NumericBinIndex; -import com.google.gridworks.commands.Command; -import com.google.gridworks.expr.Evaluable; -import com.google.gridworks.expr.MetaParser; -import com.google.gridworks.expr.ParsingException; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; - -public class GetScatterplotCommand extends Command { - - final static Logger logger = LoggerFactory.getLogger("get-scatterplot_command"); - - public void doGet(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - try { - long start = System.currentTimeMillis(); - - Project project = getProject(request); - Engine engine = getEngine(request, project); - JSONObject conf = getJsonParameter(request,"plotter"); - - response.setHeader("Content-Type", "image/png"); - - ServletOutputStream sos = null; - - try { - sos = response.getOutputStream(); - draw(sos, project, engine, conf); - } finally { - sos.close(); - } - - logger.trace("Drawn scatterplot in {} ms", Long.toString(System.currentTimeMillis() - start)); - } catch (Exception e) { - e.printStackTrace(); - respondException(response, e); - } - } - - public void draw(OutputStream output, Project project, Engine engine, JSONObject o) throws IOException, JSONException { - - double min_x = 0; - double min_y = 0; - double max_x = 0; - double max_y = 0; - - int columnIndex_x = 0; - int columnIndex_y = 0; - - Evaluable eval_x = null; - Evaluable eval_y = null; - - int size = (o.has(ScatterplotFacet.SIZE)) ? o.getInt(ScatterplotFacet.SIZE) : 100; - - double dot = (o.has(ScatterplotFacet.DOT)) ? o.getDouble(ScatterplotFacet.DOT) : 100; - - int dim_x = (o.has(ScatterplotFacet.DIM_X)) ? ScatterplotFacet.getAxisDim(o.getString(ScatterplotFacet.DIM_X)) : ScatterplotFacet.LIN; - int dim_y = (o.has(ScatterplotFacet.DIM_Y)) ? ScatterplotFacet.getAxisDim(o.getString(ScatterplotFacet.DIM_Y)) : ScatterplotFacet.LIN; - - int rotation = (o.has(ScatterplotFacet.ROTATION)) ? ScatterplotFacet.getRotation(o.getString(ScatterplotFacet.ROTATION)) : ScatterplotFacet.NO_ROTATION; - - String color_str = (o.has(ScatterplotFacet.COLOR)) ? o.getString(ScatterplotFacet.COLOR) : "000000"; - Color color = new Color(Integer.parseInt(color_str,16)); - - String base_color_str = (o.has(ScatterplotFacet.BASE_COLOR)) ? o.getString(ScatterplotFacet.BASE_COLOR) : null; - Color base_color = base_color_str != null ? new Color(Integer.parseInt(base_color_str,16)) : null; - - String columnName_x = o.getString(ScatterplotFacet.X_COLUMN_NAME); - String expression_x = (o.has(ScatterplotFacet.X_EXPRESSION)) ? o.getString(ScatterplotFacet.X_EXPRESSION) : "value"; - - if (columnName_x.length() > 0) { - Column x_column = project.columnModel.getColumnByName(columnName_x); - if (x_column != null) { - columnIndex_x = x_column.getCellIndex(); - } - } else { - columnIndex_x = -1; - } - - try { - eval_x = MetaParser.parse(expression_x); - } catch (ParsingException e) { - logger.warn("error parsing expression", e); - } - - String columnName_y = o.getString(ScatterplotFacet.Y_COLUMN_NAME); - String expression_y = (o.has(ScatterplotFacet.Y_EXPRESSION)) ? o.getString(ScatterplotFacet.Y_EXPRESSION) : "value"; - - if (columnName_y.length() > 0) { - Column y_column = project.columnModel.getColumnByName(columnName_y); - if (y_column != null) { - columnIndex_y = y_column.getCellIndex(); - } - } else { - columnIndex_y = -1; - } - - try { - eval_y = MetaParser.parse(expression_y); - } catch (ParsingException e) { - logger.warn("error parsing expression", e); - } - - NumericBinIndex index_x = null; - NumericBinIndex index_y = null; - - String col_x_name = o.getString(ScatterplotFacet.X_COLUMN_NAME); - Column column_x = project.columnModel.getColumnByName(col_x_name); - if (column_x != null) { - columnIndex_x = column_x.getCellIndex(); - index_x = ScatterplotFacet.getBinIndex(project, column_x, eval_x, expression_x); - min_x = index_x.getMin(); - max_x = index_x.getMax(); - } - - String col_y_name = o.getString(ScatterplotFacet.Y_COLUMN_NAME); - Column column_y = project.columnModel.getColumnByName(col_y_name); - if (column_y != null) { - columnIndex_y = column_y.getCellIndex(); - index_y = ScatterplotFacet.getBinIndex(project, column_y, eval_y, expression_y); - min_y = index_y.getMin(); - max_y = index_y.getMax(); - } - - if (index_x != null && index_y != null && index_x.isNumeric() && index_y.isNumeric()) { - ScatterplotDrawingRowVisitor drawer = new ScatterplotDrawingRowVisitor( - columnIndex_x, columnIndex_y, min_x, max_x, min_y, max_y, - size, dim_x, dim_y, rotation, dot, color - ); - - if (base_color != null) { - drawer.setColor(base_color); - - FilteredRows filteredRows = engine.getAllRows(); - filteredRows.accept(project, drawer); - - drawer.setColor(color); - } - - { - FilteredRows filteredRows = engine.getAllFilteredRows(); - filteredRows.accept(project, drawer); - } - - ImageIO.write(drawer.getImage(), "png", output); - } else { - ImageIO.write(new BufferedImage(1, 1, BufferedImage.TYPE_4BYTE_ABGR), "png", output); - } - - } - -} diff --git a/main/src/com/google/gridworks/commands/cell/BlankDownCommand.java b/main/src/com/google/gridworks/commands/cell/BlankDownCommand.java deleted file mode 100644 index 84dc14ad1..000000000 --- a/main/src/com/google/gridworks/commands/cell/BlankDownCommand.java +++ /dev/null @@ -1,24 +0,0 @@ -package com.google.gridworks.commands.cell; - -import javax.servlet.http.HttpServletRequest; - -import org.json.JSONObject; - -import com.google.gridworks.commands.EngineDependentCommand; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Project; -import com.google.gridworks.operations.cell.BlankDownOperation; - -public class BlankDownCommand extends EngineDependentCommand { - @Override - protected AbstractOperation createOperation(Project project, - HttpServletRequest request, JSONObject engineConfig) throws Exception { - - String columnName = request.getParameter("columnName"); - - return new BlankDownOperation( - engineConfig, - columnName - ); - } -} diff --git a/main/src/com/google/gridworks/commands/cell/EditOneCellCommand.java b/main/src/com/google/gridworks/commands/cell/EditOneCellCommand.java deleted file mode 100644 index 629cfff5b..000000000 --- a/main/src/com/google/gridworks/commands/cell/EditOneCellCommand.java +++ /dev/null @@ -1,128 +0,0 @@ -package com.google.gridworks.commands.cell; - -import java.io.IOException; -import java.io.Serializable; -import java.util.Properties; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.json.JSONWriter; - -import com.google.gridworks.commands.Command; -import com.google.gridworks.history.Change; -import com.google.gridworks.history.HistoryEntry; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.changes.CellChange; -import com.google.gridworks.process.QuickHistoryEntryProcess; -import com.google.gridworks.util.ParsingUtilities; -import com.google.gridworks.util.Pool; - -public class EditOneCellCommand extends Command { - @Override - public void doPost(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - try { - request.setCharacterEncoding("UTF-8"); - response.setCharacterEncoding("UTF-8"); - - Project project = getProject(request); - - int rowIndex = Integer.parseInt(request.getParameter("row")); - int cellIndex = Integer.parseInt(request.getParameter("cell")); - - String type = request.getParameter("type"); - String valueString = request.getParameter("value"); - Serializable value = null; - - if ("number".equals(type)) { - value = Double.parseDouble(valueString); - } else if ("boolean".equals(type)) { - value = "true".equalsIgnoreCase(valueString); - } else if ("date".equals(type)) { - value = ParsingUtilities.stringToDate(valueString); - } else { - value = valueString; - } - - EditOneCellProcess process = new EditOneCellProcess( - project, - "Edit single cell", - rowIndex, - cellIndex, - value - ); - - HistoryEntry historyEntry = project.processManager.queueProcess(process); - if (historyEntry != null) { - /* - * If the operation has been done, return the new cell's data - * so the client side can update the cell's rendering right away. - */ - JSONWriter writer = new JSONWriter(response.getWriter()); - - Pool pool = new Pool(); - Properties options = new Properties(); - options.put("pool", pool); - - writer.object(); - writer.key("code"); writer.value("ok"); - writer.key("historyEntry"); historyEntry.write(writer, options); - writer.key("cell"); process.newCell.write(writer, options); - writer.key("pool"); pool.write(writer, options); - writer.endObject(); - } else { - respond(response, "{ \"code\" : \"pending\" }"); - } - } catch (Exception e) { - respondException(response, e); - } - } - - protected static class EditOneCellProcess extends QuickHistoryEntryProcess { - final int rowIndex; - final int cellIndex; - final Serializable value; - Cell newCell; - - EditOneCellProcess( - Project project, - String briefDescription, - int rowIndex, - int cellIndex, - Serializable value - ) { - super(project, briefDescription); - - this.rowIndex = rowIndex; - this.cellIndex = cellIndex; - this.value = value; - } - - protected HistoryEntry createHistoryEntry(long historyEntryID) throws Exception { - Cell cell = _project.rows.get(rowIndex).getCell(cellIndex); - Column column = _project.columnModel.getColumnByCellIndex(cellIndex); - if (column == null) { - throw new Exception("No such column"); - } - - newCell = new Cell( - value, - cell != null ? cell.recon : null - ); - - String description = - "Edit single cell on row " + (rowIndex + 1) + - ", column " + column.getName(); - - Change change = new CellChange(rowIndex, cellIndex, cell, newCell); - - return new HistoryEntry( - historyEntryID, _project, description, null, change); - } - } -} diff --git a/main/src/com/google/gridworks/commands/cell/FillDownCommand.java b/main/src/com/google/gridworks/commands/cell/FillDownCommand.java deleted file mode 100644 index e0fd72995..000000000 --- a/main/src/com/google/gridworks/commands/cell/FillDownCommand.java +++ /dev/null @@ -1,24 +0,0 @@ -package com.google.gridworks.commands.cell; - -import javax.servlet.http.HttpServletRequest; - -import org.json.JSONObject; - -import com.google.gridworks.commands.EngineDependentCommand; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Project; -import com.google.gridworks.operations.cell.FillDownOperation; - -public class FillDownCommand extends EngineDependentCommand { - @Override - protected AbstractOperation createOperation(Project project, - HttpServletRequest request, JSONObject engineConfig) throws Exception { - - String columnName = request.getParameter("columnName"); - - return new FillDownOperation( - engineConfig, - columnName - ); - } -} diff --git a/main/src/com/google/gridworks/commands/cell/JoinMultiValueCellsCommand.java b/main/src/com/google/gridworks/commands/cell/JoinMultiValueCellsCommand.java deleted file mode 100644 index 7b9e551bb..000000000 --- a/main/src/com/google/gridworks/commands/cell/JoinMultiValueCellsCommand.java +++ /dev/null @@ -1,36 +0,0 @@ -package com.google.gridworks.commands.cell; - -import java.io.IOException; -import java.util.Properties; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import com.google.gridworks.commands.Command; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Project; -import com.google.gridworks.operations.cell.MultiValuedCellJoinOperation; -import com.google.gridworks.process.Process; - -public class JoinMultiValueCellsCommand extends Command { - @Override - public void doPost(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - try { - Project project = getProject(request); - - String columnName = request.getParameter("columnName"); - String keyColumnName = request.getParameter("keyColumnName"); - String separator = request.getParameter("separator"); - - AbstractOperation op = new MultiValuedCellJoinOperation(columnName, keyColumnName, separator); - Process process = op.createProcess(project, new Properties()); - - performProcessAndRespond(request, response, project, process); - } catch (Exception e) { - respondException(response, e); - } - } -} diff --git a/main/src/com/google/gridworks/commands/cell/MassEditCommand.java b/main/src/com/google/gridworks/commands/cell/MassEditCommand.java deleted file mode 100644 index 6f01e3528..000000000 --- a/main/src/com/google/gridworks/commands/cell/MassEditCommand.java +++ /dev/null @@ -1,29 +0,0 @@ -package com.google.gridworks.commands.cell; - -import javax.servlet.http.HttpServletRequest; - -import org.json.JSONObject; - -import com.google.gridworks.commands.EngineDependentCommand; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Project; -import com.google.gridworks.operations.cell.MassEditOperation; -import com.google.gridworks.util.ParsingUtilities; - -public class MassEditCommand extends EngineDependentCommand { - @Override - protected AbstractOperation createOperation(Project project, - HttpServletRequest request, JSONObject engineConfig) throws Exception { - - String columnName = request.getParameter("columnName"); - String expression = request.getParameter("expression"); - String editsString = request.getParameter("edits"); - - return new MassEditOperation( - engineConfig, - columnName, - expression, - MassEditOperation.reconstructEdits(ParsingUtilities.evaluateJsonStringToArray(editsString)) - ); - } -} diff --git a/main/src/com/google/gridworks/commands/cell/SplitMultiValueCellsCommand.java b/main/src/com/google/gridworks/commands/cell/SplitMultiValueCellsCommand.java deleted file mode 100644 index 2bcdceb97..000000000 --- a/main/src/com/google/gridworks/commands/cell/SplitMultiValueCellsCommand.java +++ /dev/null @@ -1,37 +0,0 @@ -package com.google.gridworks.commands.cell; - - import java.io.IOException; -import java.util.Properties; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import com.google.gridworks.commands.Command; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Project; -import com.google.gridworks.operations.cell.MultiValuedCellSplitOperation; -import com.google.gridworks.process.Process; - -public class SplitMultiValueCellsCommand extends Command { - @Override - public void doPost(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - try { - Project project = getProject(request); - - String columnName = request.getParameter("columnName"); - String keyColumnName = request.getParameter("keyColumnName"); - String separator = request.getParameter("separator"); - String mode = request.getParameter("mode"); - - AbstractOperation op = new MultiValuedCellSplitOperation(columnName, keyColumnName, separator, mode); - Process process = op.createProcess(project, new Properties()); - - performProcessAndRespond(request, response, project, process); - } catch (Exception e) { - respondException(response, e); - } - } -} diff --git a/main/src/com/google/gridworks/commands/cell/TextTransformCommand.java b/main/src/com/google/gridworks/commands/cell/TextTransformCommand.java deleted file mode 100644 index 46dab6542..000000000 --- a/main/src/com/google/gridworks/commands/cell/TextTransformCommand.java +++ /dev/null @@ -1,38 +0,0 @@ -package com.google.gridworks.commands.cell; - -import javax.servlet.http.HttpServletRequest; - -import org.json.JSONObject; - -import com.google.gridworks.commands.EngineDependentCommand; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Project; -import com.google.gridworks.operations.cell.TextTransformOperation; - -public class TextTransformCommand extends EngineDependentCommand { - @Override - protected AbstractOperation createOperation(Project project, - HttpServletRequest request, JSONObject engineConfig) throws Exception { - - String columnName = request.getParameter("columnName"); - String expression = request.getParameter("expression"); - String onError = request.getParameter("onError"); - boolean repeat = "true".equals(request.getParameter("repeat")); - - int repeatCount = 10; - String repeatCountString = request.getParameter("repeatCount"); - try { - repeatCount = Math.max(Math.min(Integer.parseInt(repeatCountString), 10), 0); - } catch (Exception e) { - } - - return new TextTransformOperation( - engineConfig, - columnName, - expression, - TextTransformOperation.stringToOnError(onError), - repeat, - repeatCount - ); - } -} diff --git a/main/src/com/google/gridworks/commands/cell/TransposeColumnsIntoRowsCommand.java b/main/src/com/google/gridworks/commands/cell/TransposeColumnsIntoRowsCommand.java deleted file mode 100644 index 6c26b2b1a..000000000 --- a/main/src/com/google/gridworks/commands/cell/TransposeColumnsIntoRowsCommand.java +++ /dev/null @@ -1,42 +0,0 @@ -package com.google.gridworks.commands.cell; - -import java.io.IOException; -import java.util.Properties; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import com.google.gridworks.commands.Command; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Project; -import com.google.gridworks.operations.cell.TransposeColumnsIntoRowsOperation; -import com.google.gridworks.process.Process; - -public class TransposeColumnsIntoRowsCommand extends Command { - @Override - public void doPost(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - try { - Project project = getProject(request); - - String startColumnName = request.getParameter("startColumnName"); - int columnCount = Integer.parseInt(request.getParameter("columnCount")); - String combinedColumnName = request.getParameter("combinedColumnName"); - - boolean prependColumnName = Boolean.parseBoolean(request.getParameter("prependColumnName")); - String separator = request.getParameter("separator"); - boolean ignoreBlankCells = Boolean.parseBoolean(request.getParameter("ignoreBlankCells")); - - AbstractOperation op = new TransposeColumnsIntoRowsOperation( - startColumnName, columnCount, combinedColumnName, prependColumnName, separator, ignoreBlankCells); - - Process process = op.createProcess(project, new Properties()); - - performProcessAndRespond(request, response, project, process); - } catch (Exception e) { - respondException(response, e); - } - } -} diff --git a/main/src/com/google/gridworks/commands/cell/TransposeRowsIntoColumnsCommand.java b/main/src/com/google/gridworks/commands/cell/TransposeRowsIntoColumnsCommand.java deleted file mode 100644 index 6e3916786..000000000 --- a/main/src/com/google/gridworks/commands/cell/TransposeRowsIntoColumnsCommand.java +++ /dev/null @@ -1,37 +0,0 @@ -package com.google.gridworks.commands.cell; - -import java.io.IOException; -import java.util.Properties; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import com.google.gridworks.commands.Command; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Project; -import com.google.gridworks.operations.cell.TransposeRowsIntoColumnsOperation; -import com.google.gridworks.process.Process; - -public class TransposeRowsIntoColumnsCommand extends Command { - @Override - public void doPost(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - try { - Project project = getProject(request); - - String columnName = request.getParameter("columnName"); - int rowCount = Integer.parseInt(request.getParameter("rowCount")); - - AbstractOperation op = new TransposeRowsIntoColumnsOperation( - columnName, rowCount); - - Process process = op.createProcess(project, new Properties()); - - performProcessAndRespond(request, response, project, process); - } catch (Exception e) { - respondException(response, e); - } - } -} diff --git a/main/src/com/google/gridworks/commands/column/AddColumnByFetchingURLsCommand.java b/main/src/com/google/gridworks/commands/column/AddColumnByFetchingURLsCommand.java deleted file mode 100644 index b78e01b49..000000000 --- a/main/src/com/google/gridworks/commands/column/AddColumnByFetchingURLsCommand.java +++ /dev/null @@ -1,36 +0,0 @@ -package com.google.gridworks.commands.column; - -import javax.servlet.http.HttpServletRequest; - -import org.json.JSONObject; - -import com.google.gridworks.commands.EngineDependentCommand; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Project; -import com.google.gridworks.operations.cell.TextTransformOperation; -import com.google.gridworks.operations.column.ColumnAdditionByFetchingURLsOperation; - -public class AddColumnByFetchingURLsCommand extends EngineDependentCommand { - @Override - protected AbstractOperation createOperation(Project project, - HttpServletRequest request, JSONObject engineConfig) throws Exception { - - String baseColumnName = request.getParameter("baseColumnName"); - String urlExpression = request.getParameter("urlExpression"); - String newColumnName = request.getParameter("newColumnName"); - int columnInsertIndex = Integer.parseInt(request.getParameter("columnInsertIndex")); - int delay = Integer.parseInt(request.getParameter("delay")); - String onError = request.getParameter("onError"); - - return new ColumnAdditionByFetchingURLsOperation( - engineConfig, - baseColumnName, - urlExpression, - TextTransformOperation.stringToOnError(onError), - newColumnName, - columnInsertIndex, - delay - ); - } - -} diff --git a/main/src/com/google/gridworks/commands/column/AddColumnCommand.java b/main/src/com/google/gridworks/commands/column/AddColumnCommand.java deleted file mode 100644 index c200c9855..000000000 --- a/main/src/com/google/gridworks/commands/column/AddColumnCommand.java +++ /dev/null @@ -1,34 +0,0 @@ -package com.google.gridworks.commands.column; - -import javax.servlet.http.HttpServletRequest; - -import org.json.JSONObject; - -import com.google.gridworks.commands.EngineDependentCommand; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Project; -import com.google.gridworks.operations.cell.TextTransformOperation; -import com.google.gridworks.operations.column.ColumnAdditionOperation; - -public class AddColumnCommand extends EngineDependentCommand { - @Override - protected AbstractOperation createOperation(Project project, - HttpServletRequest request, JSONObject engineConfig) throws Exception { - - String baseColumnName = request.getParameter("baseColumnName"); - String expression = request.getParameter("expression"); - String newColumnName = request.getParameter("newColumnName"); - int columnInsertIndex = Integer.parseInt(request.getParameter("columnInsertIndex")); - String onError = request.getParameter("onError"); - - return new ColumnAdditionOperation( - engineConfig, - baseColumnName, - expression, - TextTransformOperation.stringToOnError(onError), - newColumnName, - columnInsertIndex - ); - } - -} diff --git a/main/src/com/google/gridworks/commands/column/ExtendDataCommand.java b/main/src/com/google/gridworks/commands/column/ExtendDataCommand.java deleted file mode 100644 index c17091776..000000000 --- a/main/src/com/google/gridworks/commands/column/ExtendDataCommand.java +++ /dev/null @@ -1,32 +0,0 @@ -package com.google.gridworks.commands.column; - -import javax.servlet.http.HttpServletRequest; - -import org.json.JSONObject; - -import com.google.gridworks.commands.EngineDependentCommand; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Project; -import com.google.gridworks.operations.column.ExtendDataOperation; -import com.google.gridworks.util.ParsingUtilities; - -public class ExtendDataCommand extends EngineDependentCommand { - @Override - protected AbstractOperation createOperation(Project project, - HttpServletRequest request, JSONObject engineConfig) throws Exception { - - String baseColumnName = request.getParameter("baseColumnName"); - int columnInsertIndex = Integer.parseInt(request.getParameter("columnInsertIndex")); - - String jsonString = request.getParameter("extension"); - JSONObject extension = ParsingUtilities.evaluateJsonStringToObject(jsonString); - - return new ExtendDataOperation( - engineConfig, - baseColumnName, - extension, - columnInsertIndex - ); - } - -} diff --git a/main/src/com/google/gridworks/commands/column/GetColumnsInfoCommand.java b/main/src/com/google/gridworks/commands/column/GetColumnsInfoCommand.java deleted file mode 100644 index 42844c59f..000000000 --- a/main/src/com/google/gridworks/commands/column/GetColumnsInfoCommand.java +++ /dev/null @@ -1,99 +0,0 @@ -package com.google.gridworks.commands.column; - -import java.io.IOException; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.browsing.util.ExpressionBasedRowEvaluable; -import com.google.gridworks.browsing.util.NumericBinIndex; -import com.google.gridworks.browsing.util.NumericBinRowIndex; -import com.google.gridworks.commands.Command; -import com.google.gridworks.expr.Evaluable; -import com.google.gridworks.expr.MetaParser; -import com.google.gridworks.expr.ParsingException; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; - -public class GetColumnsInfoCommand extends Command { - - public void doGet(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - try { - //long start = System.currentTimeMillis(); - - response.setCharacterEncoding("UTF-8"); - response.setHeader("Content-Type", "application/json"); - - Project project = getProject(request); - //Engine engine = getEngine(request, project); - - JSONWriter writer = new JSONWriter(response.getWriter()); - - writer.array(); - for (Column column : project.columnModel.columns) { - writer.object(); - write(project, column, writer); - writer.endObject(); - } - writer.endArray(); - - //Gridworks.log("Obtained columns info in " + (System.currentTimeMillis() - start) + "ms"); - } catch (Exception e) { - e.printStackTrace(); - respondException(response, e); - } - } - - private NumericBinIndex getBinIndex(Project project, Column column) { - String expression = "value"; - String key = "numeric-bin:" + expression; - Evaluable eval = null; - try { - eval = MetaParser.parse(expression); - } catch (ParsingException e) { - // this should never happen - } - NumericBinIndex index = (NumericBinIndex) column.getPrecompute(key); - if (index == null) { - index = new NumericBinRowIndex(project, new ExpressionBasedRowEvaluable(column.getName(), column.getCellIndex(), eval)); - column.setPrecompute(key, index); - } - return index; - } - - private void write(Project project, Column column, JSONWriter writer) throws JSONException { - NumericBinIndex columnIndex = getBinIndex(project, column); - if (columnIndex != null) { - writer.key("name"); - writer.value(column.getName()); - boolean is_numeric = columnIndex.isNumeric(); - writer.key("is_numeric"); - writer.value(is_numeric); - writer.key("numeric_row_count"); - writer.value(columnIndex.getNumericRowCount()); - writer.key("non_numeric_row_count"); - writer.value(columnIndex.getNonNumericRowCount()); - writer.key("error_row_count"); - writer.value(columnIndex.getErrorRowCount()); - writer.key("blank_row_count"); - writer.value(columnIndex.getBlankRowCount()); - if (is_numeric) { - writer.key("min"); - writer.value(columnIndex.getMin()); - writer.key("max"); - writer.value(columnIndex.getMax()); - writer.key("step"); - writer.value(columnIndex.getStep()); - } - } else { - writer.key("error"); - writer.value("error finding numeric information on the '" + column.getName() + "' column"); - } - } -} diff --git a/main/src/com/google/gridworks/commands/column/MoveColumnCommand.java b/main/src/com/google/gridworks/commands/column/MoveColumnCommand.java deleted file mode 100644 index 430d74d75..000000000 --- a/main/src/com/google/gridworks/commands/column/MoveColumnCommand.java +++ /dev/null @@ -1,35 +0,0 @@ -package com.google.gridworks.commands.column; - -import java.io.IOException; -import java.util.Properties; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import com.google.gridworks.commands.Command; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Project; -import com.google.gridworks.operations.column.ColumnMoveOperation; -import com.google.gridworks.process.Process; - -public class MoveColumnCommand extends Command { - @Override - public void doPost(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - try { - Project project = getProject(request); - - String columnName = request.getParameter("columnName"); - int index = Integer.parseInt(request.getParameter("index")); - - AbstractOperation op = new ColumnMoveOperation(columnName, index); - Process process = op.createProcess(project, new Properties()); - - performProcessAndRespond(request, response, project, process); - } catch (Exception e) { - respondException(response, e); - } - } -} diff --git a/main/src/com/google/gridworks/commands/column/PreviewExtendDataCommand.java b/main/src/com/google/gridworks/commands/column/PreviewExtendDataCommand.java deleted file mode 100644 index 05a0f5afb..000000000 --- a/main/src/com/google/gridworks/commands/column/PreviewExtendDataCommand.java +++ /dev/null @@ -1,158 +0,0 @@ -package com.google.gridworks.commands.column; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.json.JSONArray; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.commands.Command; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.ReconCandidate; -import com.google.gridworks.model.Row; -import com.google.gridworks.util.FreebaseDataExtensionJob; -import com.google.gridworks.util.ParsingUtilities; -import com.google.gridworks.util.FreebaseDataExtensionJob.ColumnInfo; -import com.google.gridworks.util.FreebaseDataExtensionJob.DataExtension; - -public class PreviewExtendDataCommand extends Command { - - @Override - public void doPost(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - try { - Project project = getProject(request); - String columnName = request.getParameter("columnName"); - - String rowIndicesString = request.getParameter("rowIndices"); - if (rowIndicesString == null) { - respond(response, "{ \"code\" : \"error\", \"message\" : \"No row indices specified\" }"); - return; - } - - String jsonString = request.getParameter("extension"); - JSONObject json = ParsingUtilities.evaluateJsonStringToObject(jsonString); - - JSONArray rowIndices = ParsingUtilities.evaluateJsonStringToArray(rowIndicesString); - int length = rowIndices.length(); - int cellIndex = project.columnModel.getColumnByName(columnName).getCellIndex(); - - List topicNames = new ArrayList(); - List topicIds = new ArrayList(); - Set ids = new HashSet(); - for (int i = 0; i < length; i++) { - int rowIndex = rowIndices.getInt(i); - if (rowIndex >= 0 && rowIndex < project.rows.size()) { - Row row = project.rows.get(rowIndex); - Cell cell = row.getCell(cellIndex); - if (cell != null && cell.recon != null && cell.recon.match != null) { - topicNames.add(cell.recon.match.name); - topicIds.add(cell.recon.match.id); - ids.add(cell.recon.match.id); - } else { - topicNames.add(null); - topicIds.add(null); - ids.add(null); - } - } - } - - Map reconCandidateMap = new HashMap(); - FreebaseDataExtensionJob job = new FreebaseDataExtensionJob(json); - Map map = job.extend(ids, reconCandidateMap); - - response.setCharacterEncoding("UTF-8"); - response.setHeader("Content-Type", "application/json"); - - JSONWriter writer = new JSONWriter(response.getWriter()); - writer.object(); - writer.key("code"); writer.value("ok"); - writer.key("columns"); - writer.array(); - for (ColumnInfo info : job.columns) { - writer.object(); - writer.key("names"); - writer.array(); - for (String name : info.names) { - writer.value(name); - } - writer.endArray(); - writer.key("path"); - writer.array(); - for (String id : info.path) { - writer.value(id); - } - writer.endArray(); - writer.endObject(); - } - writer.endArray(); - - writer.key("rows"); - writer.array(); - for (int r = 0; r < topicNames.size(); r++) { - String id = topicIds.get(r); - String topicName = topicNames.get(r); - - if (id != null && map.containsKey(id)) { - DataExtension ext = map.get(id); - boolean first = true; - - if (ext.data.length > 0) { - for (Object[] row : ext.data) { - writer.array(); - if (first) { - writer.value(topicName); - first = false; - } else { - writer.value(null); - } - - for (Object cell : row) { - if (cell != null && cell instanceof ReconCandidate) { - ReconCandidate rc = (ReconCandidate) cell; - writer.object(); - writer.key("id"); writer.value(rc.id); - writer.key("name"); writer.value(rc.name); - writer.endObject(); - } else { - writer.value(cell); - } - } - - writer.endArray(); - } - continue; - } - } - - writer.array(); - if (id != null) { - writer.object(); - writer.key("id"); writer.value(id); - writer.key("name"); writer.value(topicName); - writer.endObject(); - } else { - writer.value(""); - } - writer.endArray(); - } - writer.endArray(); - - writer.endObject(); - } catch (Exception e) { - respondException(response, e); - } - } -} diff --git a/main/src/com/google/gridworks/commands/column/RemoveColumnCommand.java b/main/src/com/google/gridworks/commands/column/RemoveColumnCommand.java deleted file mode 100644 index 9477f18f6..000000000 --- a/main/src/com/google/gridworks/commands/column/RemoveColumnCommand.java +++ /dev/null @@ -1,34 +0,0 @@ -package com.google.gridworks.commands.column; - -import java.io.IOException; -import java.util.Properties; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import com.google.gridworks.commands.Command; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Project; -import com.google.gridworks.operations.column.ColumnRemovalOperation; -import com.google.gridworks.process.Process; - -public class RemoveColumnCommand extends Command { - @Override - public void doPost(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - try { - Project project = getProject(request); - - String columnName = request.getParameter("columnName"); - - AbstractOperation op = new ColumnRemovalOperation(columnName); - Process process = op.createProcess(project, new Properties()); - - performProcessAndRespond(request, response, project, process); - } catch (Exception e) { - respondException(response, e); - } - } -} diff --git a/main/src/com/google/gridworks/commands/column/RenameColumnCommand.java b/main/src/com/google/gridworks/commands/column/RenameColumnCommand.java deleted file mode 100644 index ed2307210..000000000 --- a/main/src/com/google/gridworks/commands/column/RenameColumnCommand.java +++ /dev/null @@ -1,35 +0,0 @@ -package com.google.gridworks.commands.column; - -import java.io.IOException; -import java.util.Properties; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import com.google.gridworks.commands.Command; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Project; -import com.google.gridworks.operations.column.ColumnRenameOperation; -import com.google.gridworks.process.Process; - -public class RenameColumnCommand extends Command { - @Override - public void doPost(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - try { - Project project = getProject(request); - - String oldColumnName = request.getParameter("oldColumnName"); - String newColumnName = request.getParameter("newColumnName"); - - AbstractOperation op = new ColumnRenameOperation(oldColumnName, newColumnName); - Process process = op.createProcess(project, new Properties()); - - performProcessAndRespond(request, response, project, process); - } catch (Exception e) { - respondException(response, e); - } - } -} diff --git a/main/src/com/google/gridworks/commands/column/ReorderColumnsCommand.java b/main/src/com/google/gridworks/commands/column/ReorderColumnsCommand.java deleted file mode 100644 index 2a1a7e55c..000000000 --- a/main/src/com/google/gridworks/commands/column/ReorderColumnsCommand.java +++ /dev/null @@ -1,25 +0,0 @@ -package com.google.gridworks.commands.column; - -import javax.servlet.http.HttpServletRequest; - -import org.json.JSONObject; - -import com.google.gridworks.commands.EngineDependentCommand; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Project; -import com.google.gridworks.operations.column.ColumnReorderOperation; -import com.google.gridworks.util.JSONUtilities; -import com.google.gridworks.util.ParsingUtilities; - -public class ReorderColumnsCommand extends EngineDependentCommand { - - @Override - protected AbstractOperation createOperation(Project project, - HttpServletRequest request, JSONObject engineConfig) throws Exception { - - String columnNames = request.getParameter("columnNames"); - return new ColumnReorderOperation( - JSONUtilities.toStringList( - ParsingUtilities.evaluateJsonStringToArray(columnNames))); - } -} diff --git a/main/src/com/google/gridworks/commands/column/SplitColumnCommand.java b/main/src/com/google/gridworks/commands/column/SplitColumnCommand.java deleted file mode 100644 index ca98fba35..000000000 --- a/main/src/com/google/gridworks/commands/column/SplitColumnCommand.java +++ /dev/null @@ -1,54 +0,0 @@ -package com.google.gridworks.commands.column; - -import javax.servlet.http.HttpServletRequest; - -import org.json.JSONArray; -import org.json.JSONObject; - -import com.google.gridworks.commands.EngineDependentCommand; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Project; -import com.google.gridworks.operations.column.ColumnSplitOperation; -import com.google.gridworks.util.ParsingUtilities; - -public class SplitColumnCommand extends EngineDependentCommand { - @Override - protected AbstractOperation createOperation(Project project, - HttpServletRequest request, JSONObject engineConfig) throws Exception { - - String columnName = request.getParameter("columnName"); - boolean guessCellType = Boolean.parseBoolean(request.getParameter("guessCellType")); - boolean removeOriginalColumn = Boolean.parseBoolean(request.getParameter("removeOriginalColumn")); - String mode = request.getParameter("mode"); - if ("separator".equals(mode)) { - String maxColumns = request.getParameter("maxColumns"); - - return new ColumnSplitOperation( - engineConfig, - columnName, - guessCellType, - removeOriginalColumn, - request.getParameter("separator"), - Boolean.parseBoolean(request.getParameter("regex")), - maxColumns != null && maxColumns.length() > 0 ? Integer.parseInt(maxColumns) : 0 - ); - } else { - String s = request.getParameter("fieldLengths"); - - JSONArray a = ParsingUtilities.evaluateJsonStringToArray(s); - int[] fieldLengths = new int[a.length()]; - - for (int i = 0; i < fieldLengths.length; i++) { - fieldLengths[i] = a.getInt(i); - } - - return new ColumnSplitOperation( - engineConfig, - columnName, - guessCellType, - removeOriginalColumn, - fieldLengths - ); - } - } -} diff --git a/main/src/com/google/gridworks/commands/expr/GetExpressionHistoryCommand.java b/main/src/com/google/gridworks/commands/expr/GetExpressionHistoryCommand.java deleted file mode 100644 index d2adc9f47..000000000 --- a/main/src/com/google/gridworks/commands/expr/GetExpressionHistoryCommand.java +++ /dev/null @@ -1,68 +0,0 @@ -package com.google.gridworks.commands.expr; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import java.util.Set; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.json.JSONWriter; - -import com.google.gridworks.ProjectManager; -import com.google.gridworks.commands.Command; -import com.google.gridworks.model.Project; -import com.google.gridworks.preference.TopList; - -public class GetExpressionHistoryCommand extends Command { - - static protected List toExpressionList(Object o) { - return o == null ? new ArrayList() : ((TopList) o).getList(); - } - - @Override - public void doGet(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - try { - Project project = getProject(request); - - List localExpressions = toExpressionList(project.getMetadata().getPreferenceStore().get("scripting.expressions")); - localExpressions = localExpressions.size() > 20 ? localExpressions.subList(0, 20) : localExpressions; - - List globalExpressions = toExpressionList(ProjectManager.singleton.getPreferenceStore().get("scripting.expressions")); - - Set done = new HashSet(); - - response.setCharacterEncoding("UTF-8"); - response.setHeader("Content-Type", "application/json"); - - JSONWriter writer = new JSONWriter(response.getWriter()); - writer.object(); - writer.key("expressions"); - writer.array(); - for (String s : localExpressions) { - writer.object(); - writer.key("code"); writer.value(s); - writer.key("global"); writer.value(false); - writer.endObject(); - done.add(s); - } - for (String s : globalExpressions) { - if (!done.contains(s)) { - writer.object(); - writer.key("code"); writer.value(s); - writer.key("global"); writer.value(true); - writer.endObject(); - } - } - writer.endArray(); - writer.endObject(); - } catch (Exception e) { - respondException(response, e); - } - } -} diff --git a/main/src/com/google/gridworks/commands/expr/GetExpressionLanguageInfoCommand.java b/main/src/com/google/gridworks/commands/expr/GetExpressionLanguageInfoCommand.java deleted file mode 100644 index 50b8e96c9..000000000 --- a/main/src/com/google/gridworks/commands/expr/GetExpressionLanguageInfoCommand.java +++ /dev/null @@ -1,58 +0,0 @@ -package com.google.gridworks.commands.expr; - -import java.io.IOException; -import java.util.Properties; -import java.util.Map.Entry; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.json.JSONWriter; - -import com.google.gridworks.commands.Command; -import com.google.gridworks.gel.Control; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.Function; - -public class GetExpressionLanguageInfoCommand extends Command { - - @Override - public void doGet(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - try { - response.setCharacterEncoding("UTF-8"); - response.setHeader("Content-Type", "application/json"); - - JSONWriter writer = new JSONWriter(response.getWriter()); - Properties options = new Properties(); - - writer.object(); - - writer.key("functions"); - writer.object(); - { - for (Entry entry : ControlFunctionRegistry.getFunctionMapping()) { - writer.key(entry.getKey()); - entry.getValue().write(writer, options); - } - } - writer.endObject(); - - writer.key("controls"); - writer.object(); - { - for (Entry entry : ControlFunctionRegistry.getControlMapping()) { - writer.key(entry.getKey()); - entry.getValue().write(writer, options); - } - } - writer.endObject(); - - writer.endObject(); - } catch (Exception e) { - respondException(response, e); - } - } -} diff --git a/main/src/com/google/gridworks/commands/expr/LogExpressionCommand.java b/main/src/com/google/gridworks/commands/expr/LogExpressionCommand.java deleted file mode 100644 index bccfae379..000000000 --- a/main/src/com/google/gridworks/commands/expr/LogExpressionCommand.java +++ /dev/null @@ -1,37 +0,0 @@ -package com.google.gridworks.commands.expr; - -import java.io.IOException; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import com.google.gridworks.ProjectManager; -import com.google.gridworks.commands.Command; -import com.google.gridworks.model.Project; -import com.google.gridworks.preference.TopList; - -public class LogExpressionCommand extends Command { - - @Override - public void doPost(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - try { - Project project = getProject(request); - String expression = request.getParameter("expression"); - - ((TopList) project.getMetadata().getPreferenceStore().get("scripting.expressions")) - .add(expression); - - ((TopList) ProjectManager.singleton.getPreferenceStore().get("scripting.expressions")) - .add(expression); - - response.setCharacterEncoding("UTF-8"); - response.setHeader("Content-Type", "application/json"); - response.getWriter().write("{ \"code\" : \"ok\" }"); - } catch (Exception e) { - respondException(response, e); - } - } -} diff --git a/main/src/com/google/gridworks/commands/expr/PreviewExpressionCommand.java b/main/src/com/google/gridworks/commands/expr/PreviewExpressionCommand.java deleted file mode 100644 index 3aca66301..000000000 --- a/main/src/com/google/gridworks/commands/expr/PreviewExpressionCommand.java +++ /dev/null @@ -1,205 +0,0 @@ -package com.google.gridworks.commands.expr; - -import java.io.IOException; -import java.io.Serializable; -import java.util.Calendar; -import java.util.Date; -import java.util.List; -import java.util.Properties; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.commands.Command; -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.expr.Evaluable; -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.expr.HasFields; -import com.google.gridworks.expr.MetaParser; -import com.google.gridworks.expr.ParsingException; -import com.google.gridworks.expr.WrappedCell; -import com.google.gridworks.expr.WrappedRow; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; -import com.google.gridworks.util.ParsingUtilities; - -public class PreviewExpressionCommand extends Command { - - @Override - public void doPost(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - try { - Project project = getProject(request); - - int cellIndex = Integer.parseInt(request.getParameter("cellIndex")); - String columnName = cellIndex < 0 ? "" : project.columnModel.getColumnByCellIndex(cellIndex).getName(); - - String expression = request.getParameter("expression"); - String rowIndicesString = request.getParameter("rowIndices"); - if (rowIndicesString == null) { - respond(response, "{ \"code\" : \"error\", \"message\" : \"No row indices specified\" }"); - return; - } - - boolean repeat = "true".equals(request.getParameter("repeat")); - int repeatCount = 10; - if (repeat) { - String repeatCountString = request.getParameter("repeatCount"); - try { - repeatCount = Math.max(Math.min(Integer.parseInt(repeatCountString), 10), 0); - } catch (Exception e) { - } - } - - response.setCharacterEncoding("UTF-8"); - response.setHeader("Content-Type", "application/json"); - - JSONArray rowIndices = ParsingUtilities.evaluateJsonStringToArray(rowIndicesString); - int length = rowIndices.length(); - - JSONWriter writer = new JSONWriter(response.getWriter()); - writer.object(); - - try { - Evaluable eval = MetaParser.parse(expression); - - writer.key("code"); writer.value("ok"); - writer.key("results"); writer.array(); - - Properties bindings = ExpressionUtils.createBindings(project); - for (int i = 0; i < length; i++) { - Object result = null; - - int rowIndex = rowIndices.getInt(i); - if (rowIndex >= 0 && rowIndex < project.rows.size()) { - Row row = project.rows.get(rowIndex); - Cell cell = row.getCell(cellIndex); - - try { - ExpressionUtils.bind(bindings, row, rowIndex, columnName, cell); - result = eval.evaluate(bindings); - - if (repeat) { - for (int r = 0; r < repeatCount && ExpressionUtils.isStorable(result); r++) { - Cell newCell = new Cell((Serializable) result, (cell != null) ? cell.recon : null); - ExpressionUtils.bind(bindings, row, rowIndex, columnName, newCell); - - Object newResult = eval.evaluate(bindings); - if (ExpressionUtils.isError(newResult)) { - break; - } else if (ExpressionUtils.sameValue(result, newResult)) { - break; - } else { - result = newResult; - } - } - } - } catch (Exception e) { - // ignore - } - } - - if (result == null) { - writer.value(null); - } else if (ExpressionUtils.isError(result)) { - writer.object(); - writer.key("message"); writer.value(((EvalError) result).message); - writer.endObject(); - } else { - StringBuffer sb = new StringBuffer(); - - writeValue(sb, result, false); - - writer.value(sb.toString()); - } - } - writer.endArray(); - } catch (ParsingException e) { - writer.key("code"); writer.value("error"); - writer.key("type"); writer.value("parser"); - writer.key("message"); writer.value(e.getMessage()); - } catch (Exception e) { - writer.key("code"); writer.value("error"); - writer.key("type"); writer.value("other"); - writer.key("message"); writer.value(e.getMessage()); - } - - writer.endObject(); - } catch (Exception e) { - respondException(response, e); - } - } - - static protected void writeValue(StringBuffer sb, Object v, boolean quote) throws JSONException { - if (ExpressionUtils.isError(v)) { - sb.append("[error: " + ((EvalError) v).message + "]"); - } else { - if (v == null) { - sb.append("null"); - } else { - if (v instanceof WrappedCell) { - sb.append("[object Cell]"); - } else if (v instanceof WrappedRow) { - sb.append("[object Row]"); - } else if (v instanceof JSONObject) { - sb.append(((JSONObject) v).toString()); - } else if (v instanceof JSONArray) { - sb.append(((JSONArray) v).toString()); - } else if (ExpressionUtils.isArray(v)) { - Object[] a = (Object[]) v; - sb.append("[ "); - for (int i = 0; i < a.length; i++) { - if (i > 0) { - sb.append(", "); - } - writeValue(sb, a[i], true); - } - sb.append(" ]"); - } else if (ExpressionUtils.isArrayOrList(v)) { - List list = ExpressionUtils.toObjectList(v); - sb.append("[ "); - for (int i = 0; i < list.size(); i++) { - if (i > 0) { - sb.append(", "); - } - writeValue(sb, list.get(i), true); - } - sb.append(" ]"); - } else if (v instanceof HasFields) { - sb.append("[object " + v.getClass().getSimpleName() + "]"); - } else if (v instanceof Calendar) { - Calendar c = (Calendar) v; - - sb.append("[date " + - ParsingUtilities.dateToString(c.getTime()) +"]"); - } else if (v instanceof Date) { - sb.append("[date " + - ParsingUtilities.dateToString((Date) v) +"]"); - } else if (v instanceof String) { - if (quote) { - sb.append(JSONObject.quote((String) v)); - } else { - sb.append((String) v); - } - } else if (v instanceof Double || v instanceof Float) { - Number n = (Number) v; - if (n.doubleValue() - n.longValue() == 0.0) { - sb.append(n.longValue()); - } else { - sb.append(n.doubleValue()); - } - } else { - sb.append(v.toString()); - } - } - } - } -} diff --git a/main/src/com/google/gridworks/commands/freebase/GuessTypesOfColumnCommand.java b/main/src/com/google/gridworks/commands/freebase/GuessTypesOfColumnCommand.java deleted file mode 100644 index aeb583195..000000000 --- a/main/src/com/google/gridworks/commands/freebase/GuessTypesOfColumnCommand.java +++ /dev/null @@ -1,238 +0,0 @@ -package com.google.gridworks.commands.freebase; - -import java.io.DataOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.StringWriter; -import java.net.URL; -import java.net.URLConnection; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.commands.Command; -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; -import com.google.gridworks.util.ParsingUtilities; - -public class GuessTypesOfColumnCommand extends Command { - - @Override - public void doPost(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - try { - Project project = getProject(request); - String columnName = request.getParameter("columnName"); - String serviceUrl = request.getParameter("service"); - - response.setCharacterEncoding("UTF-8"); - response.setHeader("Content-Type", "application/json"); - - JSONWriter writer = new JSONWriter(response.getWriter()); - writer.object(); - - Column column = project.columnModel.getColumnByName(columnName); - if (column == null) { - writer.key("code"); writer.value("error"); - writer.key("message"); writer.value("No such column"); - } else { - try { - writer.key("code"); writer.value("ok"); - writer.key("types"); writer.array(); - - List typeGroups = guessTypes(project, column, serviceUrl); - for (TypeGroup tg : typeGroups) { - writer.object(); - writer.key("id"); writer.value(tg.id); - writer.key("name"); writer.value(tg.name); - writer.key("score"); writer.value(tg.score); - writer.key("count"); writer.value(tg.count); - writer.endObject(); - } - - writer.endArray(); - } catch (Exception e) { - writer.key("code"); writer.value("error"); - } - } - - writer.endObject(); - } catch (Exception e) { - respondException(response, e); - } - } - - final static int s_sampleSize = 10; - - /** - * Run relevance searches for the first n cells in the given column and - * count the types of the results. Return a sorted list of types, from most - * frequent to least. - * - * @param project - * @param column - * @return - */ - protected List guessTypes(Project project, Column column, String serviceUrl) { - Map map = new HashMap(); - - int cellIndex = column.getCellIndex(); - - List samples = new ArrayList(s_sampleSize); - Set sampleSet = new HashSet(); - - for (Row row : project.rows) { - Object value = row.getCellValue(cellIndex); - if (ExpressionUtils.isNonBlankData(value)) { - String s = value.toString().trim(); - if (!sampleSet.contains(s)) { - samples.add(s); - sampleSet.add(s); - if (samples.size() >= s_sampleSize) { - break; - } - } - } - } - - StringWriter stringWriter = new StringWriter(); - try { - JSONWriter jsonWriter = new JSONWriter(stringWriter); - jsonWriter.object(); - for (int i = 0; i < samples.size(); i++) { - jsonWriter.key("q" + i); - jsonWriter.object(); - - jsonWriter.key("query"); jsonWriter.value(samples.get(i)); - jsonWriter.key("limit"); jsonWriter.value(3); - - jsonWriter.endObject(); - } - jsonWriter.endObject(); - } catch (JSONException e) { - // ignore - } - - String queriesString = stringWriter.toString(); - try { - URL url = new URL(serviceUrl); - URLConnection connection = url.openConnection(); - { - connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded"); - connection.setConnectTimeout(30000); - connection.setDoOutput(true); - - DataOutputStream dos = new DataOutputStream(connection.getOutputStream()); - try { - String body = "queries=" + ParsingUtilities.encode(queriesString); - - dos.writeBytes(body); - } finally { - dos.flush(); - dos.close(); - } - - connection.connect(); - } - - InputStream is = connection.getInputStream(); - try { - String s = ParsingUtilities.inputStreamToString(is); - JSONObject o = ParsingUtilities.evaluateJsonStringToObject(s); - - for (int i = 0; i < samples.size(); i++) { - String key = "q" + i; - if (!o.has(key)) { - continue; - } - - JSONObject o2 = o.getJSONObject(key); - if (!(o2.has("result"))) { - continue; - } - - JSONArray results = o2.getJSONArray("result"); - int count = results.length(); - - for (int j = 0; j < count; j++) { - JSONObject result = results.getJSONObject(j); - double score = 1.0 / (1 + j); // score by each result's rank - - JSONArray types = result.getJSONArray("type"); - int typeCount = types.length(); - - for (int t = 0; t < typeCount; t++) { - Object type = types.get(t); - String typeID; - String typeName; - - if (type instanceof String) { - typeID = typeName = (String) type; - } else { - typeID = ((JSONObject) type).getString("id"); - typeName = ((JSONObject) type).getString("name"); - } - - double score2 = score * (typeCount - t) / (double) typeCount; - if (map.containsKey(typeID)) { - TypeGroup tg = map.get(typeID); - tg.score += score2; - tg.count++; - } else { - map.put(typeID, new TypeGroup(typeID, typeName, score2)); - } - } - } - } - } finally { - is.close(); - } - } catch (Exception e) { - logger.error("Failed to guess cell types for load\n" + queriesString, e); - } - - List types = new ArrayList(map.values()); - Collections.sort(types, new Comparator() { - public int compare(TypeGroup o1, TypeGroup o2) { - int c = Math.min(s_sampleSize, o2.count) - Math.min(s_sampleSize, o1.count); - if (c != 0) { - return c; - } - return (int) Math.signum(o2.score / o2.count - o1.score / o1.count); - } - }); - - return types; - } - - static protected class TypeGroup { - String id; - String name; - int count; - double score; - - TypeGroup(String id, String name, double score) { - this.id = id; - this.name = name; - this.score = score; - this.count = 1; - } - } -} diff --git a/main/src/com/google/gridworks/commands/freebase/ImportQADataCommand.java b/main/src/com/google/gridworks/commands/freebase/ImportQADataCommand.java deleted file mode 100644 index abc7e599f..000000000 --- a/main/src/com/google/gridworks/commands/freebase/ImportQADataCommand.java +++ /dev/null @@ -1,36 +0,0 @@ -package com.google.gridworks.commands.freebase; - -import java.io.IOException; -import java.util.Properties; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import com.google.gridworks.ProjectManager; -import com.google.gridworks.commands.Command; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Project; -import com.google.gridworks.operations.recon.ImportQADataOperation; -import com.google.gridworks.process.Process; - -public class ImportQADataCommand extends Command { - @Override - public void doPost(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - ProjectManager.singleton.setBusy(true); - try { - Project project = getProject(request); - - AbstractOperation op = new ImportQADataOperation(); - Process process = op.createProcess(project, new Properties()); - - performProcessAndRespond(request, response, project, process); - } catch (Exception e) { - respondException(response, e); - } finally { - ProjectManager.singleton.setBusy(false); - } - } -} diff --git a/main/src/com/google/gridworks/commands/freebase/MQLReadCommand.java b/main/src/com/google/gridworks/commands/freebase/MQLReadCommand.java deleted file mode 100644 index ef5e75aa0..000000000 --- a/main/src/com/google/gridworks/commands/freebase/MQLReadCommand.java +++ /dev/null @@ -1,31 +0,0 @@ -package com.google.gridworks.commands.freebase; - -import java.io.IOException; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import com.google.gridworks.commands.Command; -import com.google.gridworks.oauth.OAuthUtilities; -import com.google.gridworks.oauth.Provider; -import com.google.gridworks.util.FreebaseUtils; - -public class MQLReadCommand extends Command { - - @Override - public void doPost(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - try { - Provider provider = OAuthUtilities.getProvider(request); - response.setCharacterEncoding("UTF-8"); - response.setHeader("Content-Type", "application/json"); - String query = request.getParameter("query"); - String result = FreebaseUtils.mqlread(provider,query); - response.getWriter().write(result); - } catch (Exception e) { - respondException(response, e); - } - } -} diff --git a/main/src/com/google/gridworks/commands/freebase/MQLWriteCommand.java b/main/src/com/google/gridworks/commands/freebase/MQLWriteCommand.java deleted file mode 100644 index f865c504b..000000000 --- a/main/src/com/google/gridworks/commands/freebase/MQLWriteCommand.java +++ /dev/null @@ -1,40 +0,0 @@ -package com.google.gridworks.commands.freebase; - -import java.io.IOException; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import com.google.gridworks.commands.Command; -import com.google.gridworks.oauth.Credentials; -import com.google.gridworks.oauth.OAuthUtilities; -import com.google.gridworks.oauth.Provider; -import com.google.gridworks.util.FreebaseUtils; - -public class MQLWriteCommand extends Command { - - @Override - public void doPost(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - try { - Provider provider = OAuthUtilities.getProvider(request); - - Credentials access_credentials = Credentials.getCredentials(request, provider, Credentials.Type.ACCESS); - - response.setCharacterEncoding("UTF-8"); - response.setHeader("Content-Type", "application/json"); - - if (access_credentials != null) { - String query = request.getParameter("query"); - String result = FreebaseUtils.mqlwrite(access_credentials, provider, query); - response.getWriter().write(result); - } else { - respond(response, "401 Unauthorized", "You don't have the right credentials"); - } - } catch (Exception e) { - respondException(response, e); - } - } -} diff --git a/main/src/com/google/gridworks/commands/freebase/PreviewProtographCommand.java b/main/src/com/google/gridworks/commands/freebase/PreviewProtographCommand.java deleted file mode 100644 index 32fbdff55..000000000 --- a/main/src/com/google/gridworks/commands/freebase/PreviewProtographCommand.java +++ /dev/null @@ -1,71 +0,0 @@ -package com.google.gridworks.commands.freebase; - -import java.io.IOException; -import java.io.StringWriter; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.json.JSONObject; - -import com.google.gridworks.browsing.Engine; -import com.google.gridworks.browsing.FilteredRows; -import com.google.gridworks.commands.Command; -import com.google.gridworks.model.Project; -import com.google.gridworks.protograph.Protograph; -import com.google.gridworks.protograph.transpose.MqlwriteLikeTransposedNodeFactory; -import com.google.gridworks.protograph.transpose.Transposer; -import com.google.gridworks.protograph.transpose.TripleLoaderTransposedNodeFactory; -import com.google.gridworks.util.ParsingUtilities; - -public class PreviewProtographCommand extends Command { - @Override - public void doPost(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - try { - Project project = getProject(request); - Engine engine = getEngine(request, project); - FilteredRows filteredRows = engine.getAllFilteredRows(); - - response.setCharacterEncoding("UTF-8"); - response.setHeader("Content-Type", "application/json"); - - String jsonString = request.getParameter("protograph"); - JSONObject json = ParsingUtilities.evaluateJsonStringToObject(jsonString); - Protograph protograph = Protograph.reconstruct(json); - - StringBuffer sb = new StringBuffer(2048); - sb.append("{ "); - - { - StringWriter stringWriter = new StringWriter(); - TripleLoaderTransposedNodeFactory nodeFactory = new TripleLoaderTransposedNodeFactory(project, stringWriter); - - Transposer.transpose(project, filteredRows, protograph, protograph.getRootNode(0), nodeFactory); - nodeFactory.flush(); - - sb.append("\"tripleloader\" : "); - sb.append(JSONObject.quote(stringWriter.toString())); - } - - { - StringWriter stringWriter = new StringWriter(); - MqlwriteLikeTransposedNodeFactory nodeFactory = new MqlwriteLikeTransposedNodeFactory(stringWriter); - - Transposer.transpose(project, filteredRows, protograph, protograph.getRootNode(0), nodeFactory); - nodeFactory.flush(); - - sb.append(", \"mqllike\" : "); - sb.append(stringWriter.toString()); - } - - sb.append(" }"); - - respond(response, sb.toString()); - } catch (Exception e) { - respondException(response, e); - } - } -} diff --git a/main/src/com/google/gridworks/commands/freebase/SaveProtographCommand.java b/main/src/com/google/gridworks/commands/freebase/SaveProtographCommand.java deleted file mode 100644 index 57e2db1ee..000000000 --- a/main/src/com/google/gridworks/commands/freebase/SaveProtographCommand.java +++ /dev/null @@ -1,40 +0,0 @@ -package com.google.gridworks.commands.freebase; - -import java.io.IOException; -import java.util.Properties; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.json.JSONObject; - -import com.google.gridworks.commands.Command; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Project; -import com.google.gridworks.operations.SaveProtographOperation; -import com.google.gridworks.process.Process; -import com.google.gridworks.protograph.Protograph; -import com.google.gridworks.util.ParsingUtilities; - -public class SaveProtographCommand extends Command { - @Override - public void doPost(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - try { - Project project = getProject(request); - - String jsonString = request.getParameter("protograph"); - JSONObject json = ParsingUtilities.evaluateJsonStringToObject(jsonString); - Protograph protograph = Protograph.reconstruct(json); - - AbstractOperation op = new SaveProtographOperation(protograph); - Process process = op.createProcess(project, new Properties()); - - performProcessAndRespond(request, response, project, process); - } catch (Exception e) { - respondException(response, e); - } - } -} diff --git a/main/src/com/google/gridworks/commands/freebase/UploadDataCommand.java b/main/src/com/google/gridworks/commands/freebase/UploadDataCommand.java deleted file mode 100644 index a90440468..000000000 --- a/main/src/com/google/gridworks/commands/freebase/UploadDataCommand.java +++ /dev/null @@ -1,91 +0,0 @@ -package com.google.gridworks.commands.freebase; - -import java.io.IOException; -import java.io.StringWriter; -import java.net.URL; -import java.util.Properties; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.json.JSONException; -import org.json.JSONObject; - -import com.google.gridworks.ProjectManager; -import com.google.gridworks.browsing.Engine; -import com.google.gridworks.commands.Command; -import com.google.gridworks.exporters.ProtographTransposeExporter.TripleLoaderExporter; -import com.google.gridworks.model.Project; -import com.google.gridworks.preference.PreferenceStore; -import com.google.gridworks.util.FreebaseUtils; -import com.google.gridworks.util.ParsingUtilities; - -public class UploadDataCommand extends Command { - final static public String s_dataLoadJobIDPref = "freebase.load.jobID"; - final static public String s_dataLoadJobNamePref = "freebase.load.jobName"; - - @Override - public void doPost(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - ProjectManager.singleton.setBusy(true); - try { - Project project = getProject(request); - Engine engine = getEngine(request, project); - PreferenceStore preferenceStore = project.getMetadata().getPreferenceStore(); - - TripleLoaderExporter exporter = new TripleLoaderExporter(); - StringWriter triples = new StringWriter(10 * 1024 * 1024); - exporter.export(project, new Properties(), engine, triples); - - String source_name = request.getParameter("source_name"); - String source_id = request.getParameter("source_id"); - String graph = request.getParameter("graph"); - String mdo_id = null; - - preferenceStore.put(s_dataLoadJobNamePref, source_name); - - try { - Integer jobID = (Integer) preferenceStore.get(s_dataLoadJobIDPref); - if (jobID != null) { - URL url = new URL("http://gridworks-loads.freebaseapps.com/job_id_to_mdo?job=" + jobID); - String s = ParsingUtilities.inputStreamToString(url.openConnection().getInputStream()); - - if (!s.equals("null")) { - mdo_id = s; - } - } - } catch (Exception e) { - // ignore - } - - String uploadResponse = FreebaseUtils.uploadTriples( - request, graph, source_name, source_id, mdo_id, triples.toString()); - - response.setCharacterEncoding("UTF-8"); - response.setHeader("Content-Type", "application/json"); - - try { - JSONObject obj = new JSONObject(uploadResponse); - if (obj.has("result") && !obj.isNull("result")) { - JSONObject result = obj.getJSONObject("result"); - if (result.has("job_id") && !result.isNull("job_id")) { - Integer jobID = result.getInt("job_id"); - - project.getMetadata().getPreferenceStore().put( - s_dataLoadJobIDPref, jobID); - } - } - response.getWriter().write(uploadResponse); - } catch (JSONException e) { - respond(response,"500 Error", uploadResponse); - } - - } catch (Exception e) { - respondException(response, e); - } finally { - ProjectManager.singleton.setBusy(false); - } - } -} diff --git a/main/src/com/google/gridworks/commands/history/ApplyOperationsCommand.java b/main/src/com/google/gridworks/commands/history/ApplyOperationsCommand.java deleted file mode 100644 index 192e4c403..000000000 --- a/main/src/com/google/gridworks/commands/history/ApplyOperationsCommand.java +++ /dev/null @@ -1,60 +0,0 @@ -package com.google.gridworks.commands.history; - -import java.io.IOException; -import java.util.Properties; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; - -import com.google.gridworks.commands.Command; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Project; -import com.google.gridworks.operations.OperationRegistry; -import com.google.gridworks.process.Process; -import com.google.gridworks.util.ParsingUtilities; - -public class ApplyOperationsCommand extends Command { - - @Override - public void doPost(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - Project project = getProject(request); - String jsonString = request.getParameter("operations"); - try { - JSONArray a = ParsingUtilities.evaluateJsonStringToArray(jsonString); - int count = a.length(); - for (int i = 0; i < count; i++) { - JSONObject obj = a.getJSONObject(i); - - reconstructOperation(project, obj); - } - - if (project.processManager.hasPending()) { - respond(response, "{ \"code\" : \"pending\" }"); - } else { - respond(response, "{ \"code\" : \"ok\" }"); - } - } catch (JSONException e) { - respondException(response, e); - } - } - - protected void reconstructOperation(Project project, JSONObject obj) { - AbstractOperation operation = OperationRegistry.reconstruct(project, obj); - if (operation != null) { - try { - Process process = operation.createProcess(project, new Properties()); - - project.processManager.queueProcess(process); - } catch (Exception e) { - e.printStackTrace(); - } - } - } -} diff --git a/main/src/com/google/gridworks/commands/history/CancelProcessesCommand.java b/main/src/com/google/gridworks/commands/history/CancelProcessesCommand.java deleted file mode 100644 index 947d5d8c1..000000000 --- a/main/src/com/google/gridworks/commands/history/CancelProcessesCommand.java +++ /dev/null @@ -1,31 +0,0 @@ -package com.google.gridworks.commands.history; - -import java.io.IOException; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import com.google.gridworks.commands.Command; -import com.google.gridworks.model.Project; - -public class CancelProcessesCommand extends Command { - - @Override - public void doPost(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - if( request == null ) throw new IllegalArgumentException("parameter 'request' should not be null"); - if( response == null ) throw new IllegalArgumentException("parameter 'request' should not be null"); - - try { - Project project = getProject(request); - project.getProcessManager().cancelAll(); - - response.setCharacterEncoding("UTF-8"); - response.setHeader("Content-Type", "application/json"); - response.getWriter().write("{ \"code\" : \"ok\" }"); - } catch (Exception e) { - respondException(response, e); - } - } -} diff --git a/main/src/com/google/gridworks/commands/history/GetHistoryCommand.java b/main/src/com/google/gridworks/commands/history/GetHistoryCommand.java deleted file mode 100644 index 379a3ca9c..000000000 --- a/main/src/com/google/gridworks/commands/history/GetHistoryCommand.java +++ /dev/null @@ -1,27 +0,0 @@ -package com.google.gridworks.commands.history; - -import java.io.IOException; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.json.JSONException; - -import com.google.gridworks.commands.Command; -import com.google.gridworks.model.Project; - -public class GetHistoryCommand extends Command { - @Override - public void doGet(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - Project project = getProject(request); - try { - respondJSON(response, project.history); - } catch (JSONException e) { - respondException(response, e); - } - } - -} diff --git a/main/src/com/google/gridworks/commands/history/GetOperationsCommand.java b/main/src/com/google/gridworks/commands/history/GetOperationsCommand.java deleted file mode 100644 index 495b77995..000000000 --- a/main/src/com/google/gridworks/commands/history/GetOperationsCommand.java +++ /dev/null @@ -1,50 +0,0 @@ -package com.google.gridworks.commands.history; - -import java.io.IOException; -import java.util.Properties; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.commands.Command; -import com.google.gridworks.history.HistoryEntry; -import com.google.gridworks.model.Project; - -public class GetOperationsCommand extends Command { - @Override - public void doGet(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - Project project = getProject(request); - - try { - response.setCharacterEncoding("UTF-8"); - response.setHeader("Content-Type", "application/json"); - - Properties options = new Properties(); - JSONWriter writer = new JSONWriter(response.getWriter()); - - writer.object(); - writer.key("entries"); writer.array(); - - for (HistoryEntry entry : project.history.getLastPastEntries(-1)) { - writer.object(); - writer.key("description"); writer.value(entry.description); - if (entry.operation != null) { - writer.key("operation"); - entry.operation.write(writer, options); - } - writer.endObject(); - } - writer.endArray(); - writer.endObject(); - } catch (JSONException e) { - respondException(response, e); - } - } - -} diff --git a/main/src/com/google/gridworks/commands/history/GetProcessesCommand.java b/main/src/com/google/gridworks/commands/history/GetProcessesCommand.java deleted file mode 100644 index d0bd49c69..000000000 --- a/main/src/com/google/gridworks/commands/history/GetProcessesCommand.java +++ /dev/null @@ -1,27 +0,0 @@ -package com.google.gridworks.commands.history; - -import java.io.IOException; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.json.JSONException; - -import com.google.gridworks.commands.Command; -import com.google.gridworks.model.Project; - -public class GetProcessesCommand extends Command { - @Override - public void doGet(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - Project project = getProject(request); - - try { - respondJSON(response, project.processManager); - } catch (JSONException e) { - respondException(response, e); - } - } -} diff --git a/main/src/com/google/gridworks/commands/history/UndoRedoCommand.java b/main/src/com/google/gridworks/commands/history/UndoRedoCommand.java deleted file mode 100644 index eaab0539a..000000000 --- a/main/src/com/google/gridworks/commands/history/UndoRedoCommand.java +++ /dev/null @@ -1,44 +0,0 @@ -package com.google.gridworks.commands.history; - -import java.io.IOException; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import com.google.gridworks.commands.Command; -import com.google.gridworks.history.HistoryProcess; -import com.google.gridworks.model.Project; - -public class UndoRedoCommand extends Command { - - @Override - public void doPost(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - Project project = getProject(request); - - long lastDoneID = -1; - String lastDoneIDString = request.getParameter("lastDoneID"); - if (lastDoneIDString != null) { - lastDoneID = Long.parseLong(lastDoneIDString); - } else { - String undoIDString = request.getParameter("undoID"); - if (undoIDString != null) { - long undoID = Long.parseLong(undoIDString); - - lastDoneID = project.history.getPrecedingEntryID(undoID); - } - } - - try { - boolean done = lastDoneID == -1 || - project.processManager.queueProcess( - new HistoryProcess(project, lastDoneID)); - - respond(response, "{ \"code\" : " + (done ? "\"ok\"" : "\"pending\"") + " }"); - } catch (Exception e) { - respondException(response, e); - } - } -} diff --git a/main/src/com/google/gridworks/commands/project/CreateProjectCommand.java b/main/src/com/google/gridworks/commands/project/CreateProjectCommand.java deleted file mode 100644 index 4ce7b9a75..000000000 --- a/main/src/com/google/gridworks/commands/project/CreateProjectCommand.java +++ /dev/null @@ -1,459 +0,0 @@ -package com.google.gridworks.commands.project; - -import java.io.BufferedInputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.FilterInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.Reader; -import java.io.Serializable; -import java.io.UnsupportedEncodingException; -import java.net.URL; -import java.net.URLConnection; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.Map.Entry; -import java.util.zip.GZIPInputStream; -import java.util.zip.ZipEntry; -import java.util.zip.ZipInputStream; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.apache.commons.fileupload.FileItemIterator; -import org.apache.commons.fileupload.FileItemStream; -import org.apache.commons.fileupload.servlet.ServletFileUpload; -import org.apache.commons.fileupload.util.Streams; -import org.apache.tools.bzip2.CBZip2InputStream; -import org.apache.tools.tar.TarEntry; -import org.apache.tools.tar.TarInputStream; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.google.gridworks.ProjectManager; -import com.google.gridworks.ProjectMetadata; -import com.google.gridworks.commands.Command; -import com.google.gridworks.importers.Importer; -import com.google.gridworks.importers.ImporterRegistry; -import com.google.gridworks.importers.ReaderImporter; -import com.google.gridworks.importers.StreamImporter; -import com.google.gridworks.importers.TsvCsvImporter; -import com.google.gridworks.importers.UrlImporter; -import com.google.gridworks.model.Project; -import com.google.gridworks.util.IOUtils; -import com.google.gridworks.util.ParsingUtilities; -import com.ibm.icu.text.CharsetDetector; -import com.ibm.icu.text.CharsetMatch; - -public class CreateProjectCommand extends Command { - - final static Logger logger = LoggerFactory.getLogger("create-project_command"); - - @Override - public void doPost(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - ProjectManager.singleton.setBusy(true); - try { - /* - * The uploaded file is in the POST body as a "file part". If - * we call request.getParameter() then the POST body will get - * read and we won't have a chance to parse the body ourselves. - * This is why we have to parse the URL for parameters ourselves. - * Don't call request.getParameter() before calling internalImport(). - */ - Properties options = ParsingUtilities.parseUrlParameters(request); - - Project project = new Project(); - ProjectMetadata pm = new ProjectMetadata(); - - internalImport(request, project, pm, options); - - /* - * The import process above populates options with parameters - * in the POST body. That's why we're constructing the project - * metadata object after calling internalImport(). - */ - pm.setName(options.getProperty("project-name")); - pm.setPassword(options.getProperty("project-password")); - pm.setEncoding(options.getProperty("encoding")); - pm.setEncodingConfidence(options.getProperty("encoding_confidence")); - ProjectManager.singleton.registerProject(project, pm); - - project.update(); - - redirect(response, "/project?project=" + project.id); - } catch (Exception e) { - redirect(response, "/error.html?redirect=index.html&msg=" + - ParsingUtilities.encode("Failed to import file: " + e.getLocalizedMessage()) - ); - e.printStackTrace(); - } finally { - ProjectManager.singleton.setBusy(false); - } - } - - protected void internalImport( - HttpServletRequest request, - Project project, - ProjectMetadata metadata, - Properties options - ) throws Exception { - - ServletFileUpload upload = new ServletFileUpload(); - String url = options.getProperty("url"); - boolean imported = false; - - FileItemIterator iter = upload.getItemIterator(request); - while (iter.hasNext()) { - FileItemStream item = iter.next(); - String name = item.getFieldName().toLowerCase(); - InputStream stream = item.openStream(); - if (item.isFormField()) { - if (name.equals("raw-text")) { - Reader reader = new InputStreamReader(stream,"UTF-8"); - try { - internalInvokeImporter(project, new TsvCsvImporter(), metadata, options, reader); - imported = true; - } finally { - reader.close(); - } - } else if (name.equals("project-url")) { - url = Streams.asString(stream); - } else { - options.put(name, Streams.asString(stream)); - } - } else { - String fileName = item.getName().toLowerCase(); - if (fileName.length() > 0) { - try { - internalImportFile(project, metadata, options, fileName, stream); - imported = true; - } finally { - stream.close(); - } - } - } - } - - if (!imported && url != null && url.length() > 0) { - internalImportURL(request, project, metadata, options, url); - } - } - - static class SafeInputStream extends FilterInputStream { - public SafeInputStream(InputStream stream) { - super(stream); - } - - @Override - public void close() { - // some libraries attempt to close the input stream while they can't - // read anymore from it... unfortunately this behavior prevents - // the zip input stream from functioning correctly so we just have - // to ignore those close() calls and just close it ourselves - // forcefully later - } - - public void reallyClose() throws IOException { - super.close(); - } - } - - protected void internalImportFile( - Project project, - ProjectMetadata metadata, - Properties options, - String fileName, - InputStream inputStream - ) throws Exception { - - logger.info("Importing '{}'", fileName); - - if (fileName.endsWith(".zip") || fileName.endsWith(".tar.gz") || fileName.endsWith(".tgz") || fileName.endsWith(".tar.bz2")) { - - // first, save the file on disk, since we need two passes and we might - // not have enough memory to keep it all in there - File file = save(inputStream); - - // in the first pass, gather statistics about what files are in there - // unfortunately, we have to rely on files extensions, which is horrible but - // better than nothing - HashMap ext_map = new HashMap(); - - FileInputStream fis = new FileInputStream(file); - InputStream is = getStream(fileName, fis); - - // NOTE(SM): unfortunately, java.io does not provide any generalized class for - // archive-like input streams so while both TarInputStream and ZipInputStream - // behave precisely the same, there is no polymorphic behavior so we have - // to treat each instance explicitly... one of those times you wish you had - // closures - try { - if (is instanceof TarInputStream) { - TarInputStream tis = (TarInputStream) is; - TarEntry te; - while ((te = tis.getNextEntry()) != null) { - if (!te.isDirectory()) { - mapExtension(te.getName(),ext_map); - } - } - } else if (is instanceof ZipInputStream) { - ZipInputStream zis = (ZipInputStream) is; - ZipEntry ze; - while ((ze = zis.getNextEntry()) != null) { - if (!ze.isDirectory()) { - mapExtension(ze.getName(),ext_map); - } - } - } - } finally { - try { - is.close(); - fis.close(); - } catch (IOException e) {} - } - - // sort extensions by how often they appear - List> values = new ArrayList>(ext_map.entrySet()); - Collections.sort(values, new ValuesComparator()); - - if (values.size() == 0) { - throw new RuntimeException("The archive contains no files."); - } - - // this will contain the set of extensions we'll load from the archive - HashSet exts = new HashSet(); - - // find the extension that is most frequent or those who share the highest frequency value - if (values.size() == 1) { - exts.add(values.get(0).getKey()); - } else { - Entry most_frequent = values.get(0); - Entry second_most_frequent = values.get(1); - if (most_frequent.getValue() > second_most_frequent.getValue()) { // we have a winner - exts.add(most_frequent.getKey()); - } else { // multiple extensions have the same frequency - int winning_frequency = most_frequent.getValue(); - for (Entry e : values) { - if (e.getValue() == winning_frequency) { - exts.add(e.getKey()); - } - } - } - } - - logger.info("Most frequent extensions: {}", exts.toString()); - - // second pass, load the data for real - is = getStream(fileName, new FileInputStream(file)); - SafeInputStream sis = new SafeInputStream(is); - try { - if (is instanceof TarInputStream) { - TarInputStream tis = (TarInputStream) is; - TarEntry te; - while ((te = tis.getNextEntry()) != null) { - if (!te.isDirectory()) { - String name = te.getName(); - String ext = getExtension(name)[1]; - if (exts.contains(ext)) { - internalImportFile(project, metadata, options, name, sis); - } - } - } - } else if (is instanceof ZipInputStream) { - ZipInputStream zis = (ZipInputStream) is; - ZipEntry ze; - while ((ze = zis.getNextEntry()) != null) { - if (!ze.isDirectory()) { - String name = ze.getName(); - String ext = getExtension(name)[1]; - if (exts.contains(ext)) { - internalImportFile(project, metadata, options, name, sis); - } - } - } - } - } finally { - try { - sis.reallyClose(); - } catch (IOException e) {} - } - - } else if (fileName.endsWith(".gz")) { - internalImportFile(project, metadata, options, getExtension(fileName)[0], new GZIPInputStream(inputStream)); - } else if (fileName.endsWith(".bz2")) { - internalImportFile(project, metadata, options, getExtension(fileName)[0], new CBZip2InputStream(inputStream)); - } else { - load(project, metadata, options, fileName, inputStream); - } - } - - public static class ValuesComparator implements Comparator>, Serializable { - private static final long serialVersionUID = 8845863616149837657L; - - public int compare(Entry o1, Entry o2) { - return o2.getValue() - o1.getValue(); - } - } - - private void load(Project project, ProjectMetadata metadata, Properties options, String fileName, InputStream inputStream) throws Exception { - Importer importer = ImporterRegistry.guessImporter(null, fileName); - internalInvokeImporter(project, importer, metadata, options, inputStream, null); - } - - private File save(InputStream is) throws IOException { - File temp = this.servlet.getTempFile(Long.toString(System.currentTimeMillis())); - temp.deleteOnExit(); - IOUtils.copy(is,temp); - is.close(); - return temp; - } - - private void mapExtension(String name, Map ext_map) { - String ext = getExtension(name)[1]; - if (ext_map.containsKey(ext)) { - ext_map.put(ext, ext_map.get(ext) + 1); - } else { - ext_map.put(ext, 1); - } - } - - private InputStream getStream(String fileName, InputStream is) throws IOException { - if (fileName.endsWith(".tar.gz") || fileName.endsWith(".tgz")) { - return new TarInputStream(new GZIPInputStream(is)); - } else if (fileName.endsWith(".tar.bz2")) { - return new TarInputStream(new CBZip2InputStream(is)); - } else { - return new ZipInputStream(is); - } - } - - private String[] getExtension(String filename) { - String[] result = new String[2]; - int ext_index = filename.lastIndexOf('.'); - result[0] = (ext_index == -1) ? filename : filename.substring(0,ext_index); - result[1] = (ext_index == -1) ? "" : filename.substring(ext_index + 1); - return result; - } - - protected void internalImportURL( - HttpServletRequest request, - Project project, - ProjectMetadata metadata, - Properties options, - String urlString) throws Exception { - - URL url = new URL(urlString); - URLConnection connection = null; - - // Try for a URL importer first - Importer importer = ImporterRegistry.guessUrlImporter(url); - if (importer instanceof UrlImporter) { - ((UrlImporter) importer).read(url, project, metadata, options); - } else { - // If we couldn't find one, try opening URL and treating as a stream - try { - connection = url.openConnection(); - connection.setConnectTimeout(5000); - connection.connect(); - } catch (Exception e) { - throw new Exception("Cannot connect to " + urlString, e); - } - - InputStream inputStream = null; - try { - inputStream = connection.getInputStream(); - } catch (Exception e) { - throw new Exception("Cannot retrieve content from " + url, e); - } - - try { - String contentType = connection.getContentType(); - int semicolon = contentType.indexOf(';'); - if (semicolon >= 0) { - contentType = contentType.substring(0, semicolon); - } - - importer = ImporterRegistry.guessImporter(contentType, url.getPath()); - - internalInvokeImporter(project, importer, metadata, options, inputStream, connection.getContentEncoding()); - } finally { - inputStream.close(); - } - } - } - - protected void internalInvokeImporter( - Project project, - Importer importer, - ProjectMetadata metadata, - Properties options, - InputStream rawInputStream, - String encoding - ) throws Exception { - if (importer instanceof ReaderImporter) { - - BufferedInputStream inputStream = new BufferedInputStream(rawInputStream); - - // NOTE(SM): The ICU4J char detection code requires the input stream to support mark/reset. - // Unfortunately, not all ServletInputStream implementations are marking, so we need do - // this memory-expensive wrapping to make it work. It's far from ideal but I don't have - // a more efficient solution. - byte[] bytes = new byte[1024 * 4]; - inputStream.mark(bytes.length); - inputStream.read(bytes); - inputStream.reset(); - - CharsetDetector detector = new CharsetDetector(); - detector.setDeclaredEncoding("utf8"); // most of the content on the web is encoded in UTF-8 so start with that - - Reader reader = null; - CharsetMatch[] charsetMatches = detector.setText(bytes).detectAll(); - for (CharsetMatch charsetMatch : charsetMatches) { - try { - reader = new InputStreamReader(inputStream, charsetMatch.getName()); - - options.setProperty("encoding", charsetMatch.getName()); - options.setProperty("encoding_confidence", Integer.toString(charsetMatch.getConfidence())); - - logger.info("Best encoding guess: {} [confidence: {}]", charsetMatch.getName(), charsetMatch.getConfidence()); - - break; - } catch (UnsupportedEncodingException e) { - // silent - } - } - - if (reader == null) { // when all else fails - reader = encoding != null ? - new InputStreamReader(inputStream, encoding) : - new InputStreamReader(inputStream); - } - - ((ReaderImporter) importer).read(reader, project, metadata, options); - } else { - ((StreamImporter) importer).read(rawInputStream, project, metadata, options); - } - } - - protected void internalInvokeImporter( - Project project, - ReaderImporter importer, - ProjectMetadata metadata, - Properties options, - Reader reader - ) throws Exception { - importer.read(reader, project, metadata, options); - } - -} diff --git a/main/src/com/google/gridworks/commands/project/DeleteProjectCommand.java b/main/src/com/google/gridworks/commands/project/DeleteProjectCommand.java deleted file mode 100644 index 4b06dec11..000000000 --- a/main/src/com/google/gridworks/commands/project/DeleteProjectCommand.java +++ /dev/null @@ -1,28 +0,0 @@ -package com.google.gridworks.commands.project; - -import java.io.IOException; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import com.google.gridworks.ProjectManager; -import com.google.gridworks.commands.Command; - -public class DeleteProjectCommand extends Command { - @Override - public void doPost(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - try { - long projectID = Long.parseLong(request.getParameter("project")); - - ProjectManager.singleton.deleteProject(projectID); - - respond(response, "{ \"code\" : \"ok\" }"); - - } catch (Exception e) { - respondException(response, e); - } - } -} diff --git a/main/src/com/google/gridworks/commands/project/ExportProjectCommand.java b/main/src/com/google/gridworks/commands/project/ExportProjectCommand.java deleted file mode 100644 index 1e6f905ec..000000000 --- a/main/src/com/google/gridworks/commands/project/ExportProjectCommand.java +++ /dev/null @@ -1,59 +0,0 @@ -package com.google.gridworks.commands.project; - -import java.io.IOException; -import java.io.OutputStream; -import java.util.zip.GZIPOutputStream; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.apache.tools.tar.TarOutputStream; - -import com.google.gridworks.ProjectManager; -import com.google.gridworks.commands.Command; -import com.google.gridworks.model.Project; - -public class ExportProjectCommand extends Command { - - @Override - public void doPost(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - try { - Project project = getProject(request); - ProjectManager.singleton.ensureProjectSaved(project.id); - - response.setHeader("Content-Type", "application/x-gzip"); - - OutputStream os = response.getOutputStream(); - try { - gzipTarToOutputStream(project, os); - } finally { - os.close(); - } - } catch (Exception e) { - respondException(response, e); - } - } - - protected void gzipTarToOutputStream(Project project, OutputStream os) throws IOException { - GZIPOutputStream gos = new GZIPOutputStream(os); - try { - tarToOutputStream(project, gos); - } finally { - gos.close(); - } - } - - protected void tarToOutputStream(Project project, OutputStream os) throws IOException { - TarOutputStream tos = new TarOutputStream(os); - try { - ProjectManager.singleton.exportProject(project.id, tos); - } finally { - tos.close(); - } - } - - -} diff --git a/main/src/com/google/gridworks/commands/project/ExportRowsCommand.java b/main/src/com/google/gridworks/commands/project/ExportRowsCommand.java deleted file mode 100644 index d3c6fa564..000000000 --- a/main/src/com/google/gridworks/commands/project/ExportRowsCommand.java +++ /dev/null @@ -1,64 +0,0 @@ -package com.google.gridworks.commands.project; - -import java.io.IOException; -import java.io.PrintWriter; -import java.util.Enumeration; -import java.util.Properties; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import com.google.gridworks.ProjectManager; -import com.google.gridworks.browsing.Engine; -import com.google.gridworks.commands.Command; -import com.google.gridworks.exporters.CsvExporter; -import com.google.gridworks.exporters.Exporter; -import com.google.gridworks.exporters.ExporterRegistry; -import com.google.gridworks.model.Project; - -public class ExportRowsCommand extends Command { - - @SuppressWarnings("unchecked") - static public Properties getRequestParameters(HttpServletRequest request) { - Properties options = new Properties(); - - Enumeration en = request.getParameterNames(); - while (en.hasMoreElements()) { - String name = en.nextElement(); - options.put(name, request.getParameter(name)); - } - return options; - } - - public void doPost(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - ProjectManager.singleton.setBusy(true); - try { - Project project = getProject(request); - Engine engine = getEngine(request, project); - String format = request.getParameter("format"); - Properties options = getRequestParameters(request); - - Exporter exporter = ExporterRegistry.getExporter(format); - if (exporter == null) { - exporter = new CsvExporter('\t'); - } - - response.setCharacterEncoding("UTF-8"); - response.setHeader("Content-Type", exporter.getContentType()); - - if (exporter.takeWriter()) { - PrintWriter writer = response.getWriter(); - exporter.export(project, options, engine, writer); - } else { - exporter.export(project, options, engine, response.getOutputStream()); - } - } catch (Exception e) { - respondException(response, e); - } finally { - ProjectManager.singleton.setBusy(false); - } - } -} diff --git a/main/src/com/google/gridworks/commands/project/GetModelsCommand.java b/main/src/com/google/gridworks/commands/project/GetModelsCommand.java deleted file mode 100644 index ec8077fb0..000000000 --- a/main/src/com/google/gridworks/commands/project/GetModelsCommand.java +++ /dev/null @@ -1,66 +0,0 @@ -package com.google.gridworks.commands.project; - -import java.io.IOException; -import java.util.Properties; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.commands.Command; -import com.google.gridworks.expr.MetaParser; -import com.google.gridworks.expr.MetaParser.LanguageInfo; -import com.google.gridworks.model.OverlayModel; -import com.google.gridworks.model.Project; - -public class GetModelsCommand extends Command { - @Override - public void doGet(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - Project project = getProject(request); - - try { - response.setCharacterEncoding("UTF-8"); - response.setHeader("Content-Type", "application/json"); - - Properties options = new Properties(); - JSONWriter writer = new JSONWriter(response.getWriter()); - - writer.object(); - writer.key("columnModel"); project.columnModel.write(writer, options); - writer.key("recordModel"); project.recordModel.write(writer, options); - - writer.key("overlayModels"); writer.object(); - for (String modelName : project.overlayModels.keySet()) { - OverlayModel overlayModel = project.overlayModels.get(modelName); - if (overlayModel != null) { - writer.key(modelName); - - project.overlayModels.get(modelName).write(writer, options); - } - } - writer.endObject(); - - writer.key("scripting"); writer.object(); - for (String languagePrefix : MetaParser.getLanguagePrefixes()) { - LanguageInfo info = MetaParser.getLanguageInfo(languagePrefix); - - writer.key(languagePrefix); - writer.object(); - writer.key("name"); writer.value(info.name); - writer.key("defaultExpression"); writer.value(info.defaultExpression); - writer.endObject(); - } - writer.endObject(); - - writer.endObject(); - } catch (JSONException e) { - respondException(response, e); - } - } - -} diff --git a/main/src/com/google/gridworks/commands/project/GetProjectMetadataCommand.java b/main/src/com/google/gridworks/commands/project/GetProjectMetadataCommand.java deleted file mode 100644 index 2b4155bbb..000000000 --- a/main/src/com/google/gridworks/commands/project/GetProjectMetadataCommand.java +++ /dev/null @@ -1,28 +0,0 @@ -package com.google.gridworks.commands.project; - -import java.io.IOException; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.json.JSONException; - -import com.google.gridworks.ProjectManager; -import com.google.gridworks.commands.Command; -import com.google.gridworks.model.Project; - -public class GetProjectMetadataCommand extends Command { - @Override - public void doGet(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - Project project = getProject(request); - - try { - respondJSON(response, ProjectManager.singleton.getProjectMetadata(project.id)); - } catch (JSONException e) { - respondException(response, e); - } - } -} diff --git a/main/src/com/google/gridworks/commands/project/ImportProjectCommand.java b/main/src/com/google/gridworks/commands/project/ImportProjectCommand.java deleted file mode 100644 index 93fe6ff2d..000000000 --- a/main/src/com/google/gridworks/commands/project/ImportProjectCommand.java +++ /dev/null @@ -1,135 +0,0 @@ -package com.google.gridworks.commands.project; - -import java.io.IOException; -import java.io.InputStream; -import java.net.URL; -import java.net.URLConnection; -import java.util.Properties; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.apache.commons.fileupload.FileItemIterator; -import org.apache.commons.fileupload.FileItemStream; -import org.apache.commons.fileupload.servlet.ServletFileUpload; -import org.apache.commons.fileupload.util.Streams; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.google.gridworks.ProjectManager; -import com.google.gridworks.ProjectMetadata; -import com.google.gridworks.commands.Command; -import com.google.gridworks.model.Project; -import com.google.gridworks.util.ParsingUtilities; - -public class ImportProjectCommand extends Command { - - final static Logger logger = LoggerFactory.getLogger("import-project_command"); - - @Override - public void doPost(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - ProjectManager.singleton.setBusy(true); - try { - Properties options = ParsingUtilities.parseUrlParameters(request); - - long projectID = Project.generateID(); - logger.info("Importing existing project using new ID {}", projectID); - - internalImport(request, options, projectID); - - ProjectManager.singleton.loadProjectMetadata(projectID); - - ProjectMetadata pm = ProjectManager.singleton.getProjectMetadata(projectID); - if (pm != null) { - if (options.containsKey("project-name")) { - String projectName = options.getProperty("project-name"); - if (projectName != null && projectName.length() > 0) { - pm.setName(projectName); - } - } - - redirect(response, "/project?project=" + projectID); - } else { - redirect(response, "/error.html?redirect=index&msg=" + - ParsingUtilities.encode("Failed to import project") - ); - } - } catch (Exception e) { - e.printStackTrace(); - } finally { - ProjectManager.singleton.setBusy(false); - } - } - - protected void internalImport( - HttpServletRequest request, - Properties options, - long projectID - ) throws Exception { - - String url = null; - - ServletFileUpload upload = new ServletFileUpload(); - - FileItemIterator iter = upload.getItemIterator(request); - while (iter.hasNext()) { - FileItemStream item = iter.next(); - String name = item.getFieldName().toLowerCase(); - InputStream stream = item.openStream(); - if (item.isFormField()) { - if (name.equals("url")) { - url = Streams.asString(stream); - } else { - options.put(name, Streams.asString(stream)); - } - } else { - String fileName = item.getName().toLowerCase(); - try { - ProjectManager.singleton.importProject(projectID, stream, !fileName.endsWith(".tar")); - } finally { - stream.close(); - } - } - } - - if (url != null && url.length() > 0) { - internalImportURL(request, options, projectID, url); - } - } - - protected void internalImportURL( - HttpServletRequest request, - Properties options, - long projectID, - String urlString - ) throws Exception { - URL url = new URL(urlString); - URLConnection connection = null; - - try { - connection = url.openConnection(); - connection.setConnectTimeout(5000); - connection.connect(); - } catch (Exception e) { - throw new Exception("Cannot connect to " + urlString, e); - } - - InputStream inputStream = null; - try { - inputStream = connection.getInputStream(); - } catch (Exception e) { - throw new Exception("Cannot retrieve content from " + url, e); - } - - try { - ProjectManager.singleton.importProject(projectID, inputStream, !urlString.endsWith(".tar")); - } finally { - inputStream.close(); - } - } - - -} diff --git a/main/src/com/google/gridworks/commands/project/RenameProjectCommand.java b/main/src/com/google/gridworks/commands/project/RenameProjectCommand.java deleted file mode 100644 index 45bef4ca6..000000000 --- a/main/src/com/google/gridworks/commands/project/RenameProjectCommand.java +++ /dev/null @@ -1,28 +0,0 @@ -package com.google.gridworks.commands.project; - -import java.io.IOException; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import com.google.gridworks.ProjectMetadata; -import com.google.gridworks.commands.Command; - -public class RenameProjectCommand extends Command { - @Override - public void doPost(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - try { - String name = request.getParameter("name"); - ProjectMetadata pm = getProjectMetadata(request); - - pm.setName(name); - - respond(response, "{ \"code\" : \"ok\" }"); - } catch (Exception e) { - respondException(response, e); - } - } -} diff --git a/main/src/com/google/gridworks/commands/recon/ReconDiscardJudgmentsCommand.java b/main/src/com/google/gridworks/commands/recon/ReconDiscardJudgmentsCommand.java deleted file mode 100644 index f8b09b4b6..000000000 --- a/main/src/com/google/gridworks/commands/recon/ReconDiscardJudgmentsCommand.java +++ /dev/null @@ -1,21 +0,0 @@ -package com.google.gridworks.commands.recon; - -import javax.servlet.http.HttpServletRequest; - -import org.json.JSONObject; - -import com.google.gridworks.commands.EngineDependentCommand; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Project; -import com.google.gridworks.operations.recon.ReconDiscardJudgmentsOperation; - -public class ReconDiscardJudgmentsCommand extends EngineDependentCommand { - @Override - protected AbstractOperation createOperation(Project project, - HttpServletRequest request, JSONObject engineConfig) throws Exception { - - String columnName = request.getParameter("columnName"); - - return new ReconDiscardJudgmentsOperation(engineConfig, columnName); - } -} diff --git a/main/src/com/google/gridworks/commands/recon/ReconJudgeOneCellCommand.java b/main/src/com/google/gridworks/commands/recon/ReconJudgeOneCellCommand.java deleted file mode 100644 index 02b8bea0c..000000000 --- a/main/src/com/google/gridworks/commands/recon/ReconJudgeOneCellCommand.java +++ /dev/null @@ -1,214 +0,0 @@ -package com.google.gridworks.commands.recon; - -import java.io.IOException; -import java.util.Properties; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.json.JSONWriter; - -import com.google.gridworks.commands.Command; -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.history.Change; -import com.google.gridworks.history.HistoryEntry; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Recon; -import com.google.gridworks.model.ReconCandidate; -import com.google.gridworks.model.ReconStats; -import com.google.gridworks.model.Recon.Judgment; -import com.google.gridworks.model.changes.CellChange; -import com.google.gridworks.model.changes.ReconChange; -import com.google.gridworks.process.QuickHistoryEntryProcess; -import com.google.gridworks.util.Pool; - -public class ReconJudgeOneCellCommand extends Command { - @Override - public void doPost(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - try { - Project project = getProject(request); - - int rowIndex = Integer.parseInt(request.getParameter("row")); - int cellIndex = Integer.parseInt(request.getParameter("cell")); - Judgment judgment = Recon.stringToJudgment(request.getParameter("judgment")); - - ReconCandidate match = null; - String id = request.getParameter("id"); - if (id != null) { - String scoreString = request.getParameter("score"); - - match = new ReconCandidate( - id, - request.getParameter("name"), - request.getParameter("types").split(","), - scoreString != null ? Double.parseDouble(scoreString) : 100 - ); - } - - JudgeOneCellProcess process = new JudgeOneCellProcess( - project, - "Judge one cell's recon result", - judgment, - rowIndex, - cellIndex, - match, - request.getParameter("identifierSpace"), - request.getParameter("schemaSpace") - ); - - HistoryEntry historyEntry = project.processManager.queueProcess(process); - if (historyEntry != null) { - /* - * If the process is done, write back the cell's data so that the - * client side can update its UI right away. - */ - JSONWriter writer = new JSONWriter(response.getWriter()); - - Pool pool = new Pool(); - Properties options = new Properties(); - options.put("pool", pool); - - writer.object(); - writer.key("code"); writer.value("ok"); - writer.key("historyEntry"); historyEntry.write(writer, options); - writer.key("cell"); process.newCell.write(writer, options); - writer.key("pool"); pool.write(writer, options); - writer.endObject(); - } else { - respond(response, "{ \"code\" : \"pending\" }"); - } - } catch (Exception e) { - respondException(response, e); - } - } - - protected static class JudgeOneCellProcess extends QuickHistoryEntryProcess { - - final int rowIndex; - final int cellIndex; - final Judgment judgment; - final ReconCandidate match; - final String identifierSpace; - final String schemaSpace; - - Cell newCell; - - JudgeOneCellProcess( - Project project, - String briefDescription, - Judgment judgment, - int rowIndex, - int cellIndex, - ReconCandidate match, - String identifierSpace, - String schemaSpace - ) { - super(project, briefDescription); - - this.judgment = judgment; - this.rowIndex = rowIndex; - this.cellIndex = cellIndex; - this.match = match; - this.identifierSpace = identifierSpace; - this.schemaSpace = schemaSpace; - } - - protected HistoryEntry createHistoryEntry(long historyEntryID) throws Exception { - Cell cell = _project.rows.get(rowIndex).getCell(cellIndex); - if (cell == null || !ExpressionUtils.isNonBlankData(cell.value)) { - throw new Exception("Cell is blank or error"); - } - - Column column = _project.columnModel.getColumnByCellIndex(cellIndex); - if (column == null) { - throw new Exception("No such column"); - } - - Judgment oldJudgment = cell.recon == null ? Judgment.None : cell.recon.judgment; - - newCell = new Cell( - cell.value, - cell.recon == null ? new Recon(historyEntryID, identifierSpace, schemaSpace) : cell.recon.dup(historyEntryID) - ); - - String cellDescription = - "single cell on row " + (rowIndex + 1) + - ", column " + column.getName() + - ", containing \"" + cell.value + "\""; - - String description = null; - - newCell.recon.matchRank = -1; - newCell.recon.judgmentAction = "single"; - newCell.recon.judgmentBatchSize = 1; - - if (judgment == Judgment.None) { - newCell.recon.judgment = Recon.Judgment.None; - newCell.recon.match = null; - - description = "Discard recon judgment for " + cellDescription; - } else if (judgment == Judgment.New) { - newCell.recon.judgment = Recon.Judgment.New; - newCell.recon.match = null; - - description = "Mark to create new topic for " + cellDescription; - } else { - newCell.recon.judgment = Recon.Judgment.Matched; - newCell.recon.match = this.match; - if (newCell.recon.candidates != null) { - for (int m = 0; m < newCell.recon.candidates.size(); m++) { - if (newCell.recon.candidates.get(m).id.equals(this.match.id)) { - newCell.recon.matchRank = m; - break; - } - } - } - - description = "Match " + this.match.name + - " (" + match.id + ") to " + - cellDescription; - } - - ReconStats stats = column.getReconStats(); - if (stats == null) { - stats = ReconStats.create(_project, cellIndex); - } else { - int newChange = 0; - int matchChange = 0; - - if (oldJudgment == Judgment.New) { - newChange--; - } - if (oldJudgment == Judgment.Matched) { - matchChange--; - } - if (newCell.recon.judgment == Judgment.New) { - newChange++; - } - if (newCell.recon.judgment == Judgment.Matched) { - matchChange++; - } - - stats = new ReconStats( - stats.nonBlanks, - stats.newTopics + newChange, - stats.matchedTopics + matchChange); - } - - Change change = new ReconChange( - new CellChange(rowIndex, cellIndex, cell, newCell), - column.getName(), - column.getReconConfig(), - stats - ); - - return new HistoryEntry( - historyEntryID, _project, description, null, change); - } - } -} diff --git a/main/src/com/google/gridworks/commands/recon/ReconJudgeSimilarCellsCommand.java b/main/src/com/google/gridworks/commands/recon/ReconJudgeSimilarCellsCommand.java deleted file mode 100644 index 617df273a..000000000 --- a/main/src/com/google/gridworks/commands/recon/ReconJudgeSimilarCellsCommand.java +++ /dev/null @@ -1,49 +0,0 @@ -package com.google.gridworks.commands.recon; - -import javax.servlet.http.HttpServletRequest; - -import org.json.JSONObject; - -import com.google.gridworks.commands.EngineDependentCommand; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Recon; -import com.google.gridworks.model.ReconCandidate; -import com.google.gridworks.model.Recon.Judgment; -import com.google.gridworks.operations.recon.ReconJudgeSimilarCellsOperation; - -public class ReconJudgeSimilarCellsCommand extends EngineDependentCommand { - - @Override - protected AbstractOperation createOperation( - Project project, HttpServletRequest request, JSONObject engineConfig) throws Exception { - - String columnName = request.getParameter("columnName"); - String similarValue = request.getParameter("similarValue"); - Judgment judgment = Recon.stringToJudgment(request.getParameter("judgment")); - - ReconCandidate match = null; - String id = request.getParameter("id"); - if (id != null) { - String scoreString = request.getParameter("score"); - - match = new ReconCandidate( - id, - request.getParameter("name"), - request.getParameter("types").split(","), - scoreString != null ? Double.parseDouble(scoreString) : 100 - ); - } - - String shareNewTopics = request.getParameter("shareNewTopics"); - - return new ReconJudgeSimilarCellsOperation( - engineConfig, - columnName, - similarValue, - judgment, - match, - "true".equals(shareNewTopics) - ); - } -} diff --git a/main/src/com/google/gridworks/commands/recon/ReconMarkNewTopicsCommand.java b/main/src/com/google/gridworks/commands/recon/ReconMarkNewTopicsCommand.java deleted file mode 100644 index 12ba4315c..000000000 --- a/main/src/com/google/gridworks/commands/recon/ReconMarkNewTopicsCommand.java +++ /dev/null @@ -1,24 +0,0 @@ -package com.google.gridworks.commands.recon; - -import javax.servlet.http.HttpServletRequest; - -import org.json.JSONObject; - -import com.google.gridworks.commands.EngineDependentCommand; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Project; -import com.google.gridworks.operations.recon.ReconMarkNewTopicsOperation; - -public class ReconMarkNewTopicsCommand extends EngineDependentCommand { - - @Override - protected AbstractOperation createOperation(Project project, - HttpServletRequest request, JSONObject engineConfig) throws Exception { - - return new ReconMarkNewTopicsOperation( - engineConfig, - request.getParameter("columnName"), - "true".equals(request.getParameter("shareNewTopics")) - ); - } -} diff --git a/main/src/com/google/gridworks/commands/recon/ReconMatchBestCandidatesCommand.java b/main/src/com/google/gridworks/commands/recon/ReconMatchBestCandidatesCommand.java deleted file mode 100644 index 15936c898..000000000 --- a/main/src/com/google/gridworks/commands/recon/ReconMatchBestCandidatesCommand.java +++ /dev/null @@ -1,22 +0,0 @@ -package com.google.gridworks.commands.recon; - -import javax.servlet.http.HttpServletRequest; - -import org.json.JSONObject; - -import com.google.gridworks.commands.EngineDependentCommand; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Project; -import com.google.gridworks.operations.recon.ReconMatchBestCandidatesOperation; - -public class ReconMatchBestCandidatesCommand extends EngineDependentCommand { - - @Override - protected AbstractOperation createOperation(Project project, - HttpServletRequest request, JSONObject engineConfig) throws Exception { - - String columnName = request.getParameter("columnName"); - - return new ReconMatchBestCandidatesOperation(engineConfig, columnName); - } -} diff --git a/main/src/com/google/gridworks/commands/recon/ReconMatchSpecificTopicCommand.java b/main/src/com/google/gridworks/commands/recon/ReconMatchSpecificTopicCommand.java deleted file mode 100644 index 187bef55f..000000000 --- a/main/src/com/google/gridworks/commands/recon/ReconMatchSpecificTopicCommand.java +++ /dev/null @@ -1,35 +0,0 @@ -package com.google.gridworks.commands.recon; - -import javax.servlet.http.HttpServletRequest; - -import org.json.JSONObject; - -import com.google.gridworks.commands.EngineDependentCommand; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.ReconCandidate; -import com.google.gridworks.operations.recon.ReconMatchSpecificTopicOperation; - -public class ReconMatchSpecificTopicCommand extends EngineDependentCommand { - - @Override - protected AbstractOperation createOperation(Project project, - HttpServletRequest request, JSONObject engineConfig) throws Exception { - - String columnName = request.getParameter("columnName"); - ReconCandidate match = new ReconCandidate( - request.getParameter("topicID"), - request.getParameter("topicName"), - request.getParameter("types").split(","), - 100 - ); - - return new ReconMatchSpecificTopicOperation( - engineConfig, - columnName, - match, - request.getParameter("identifierSpace"), - request.getParameter("schemaSpace") - ); - } -} diff --git a/main/src/com/google/gridworks/commands/recon/ReconcileCommand.java b/main/src/com/google/gridworks/commands/recon/ReconcileCommand.java deleted file mode 100644 index b7141c255..000000000 --- a/main/src/com/google/gridworks/commands/recon/ReconcileCommand.java +++ /dev/null @@ -1,28 +0,0 @@ -package com.google.gridworks.commands.recon; - -import javax.servlet.http.HttpServletRequest; - -import org.json.JSONObject; -import org.json.JSONTokener; - -import com.google.gridworks.commands.EngineDependentCommand; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.recon.ReconConfig; -import com.google.gridworks.operations.recon.ReconOperation; - -public class ReconcileCommand extends EngineDependentCommand { - - @Override - protected AbstractOperation createOperation(Project project, - HttpServletRequest request, JSONObject engineConfig) throws Exception { - - String columnName = request.getParameter("columnName"); - String configString = request.getParameter("config"); - - JSONTokener t = new JSONTokener(configString); - JSONObject config = (JSONObject) t.nextValue(); - - return new ReconOperation(engineConfig, columnName, ReconConfig.reconstruct(config)); - } -} diff --git a/main/src/com/google/gridworks/commands/row/AnnotateOneRowCommand.java b/main/src/com/google/gridworks/commands/row/AnnotateOneRowCommand.java deleted file mode 100644 index 944155d74..000000000 --- a/main/src/com/google/gridworks/commands/row/AnnotateOneRowCommand.java +++ /dev/null @@ -1,120 +0,0 @@ -package com.google.gridworks.commands.row; - -import java.io.IOException; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import com.google.gridworks.commands.Command; -import com.google.gridworks.history.HistoryEntry; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.changes.RowFlagChange; -import com.google.gridworks.model.changes.RowStarChange; -import com.google.gridworks.process.QuickHistoryEntryProcess; - -public class AnnotateOneRowCommand extends Command { - @Override - public void doPost(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - response.setCharacterEncoding("UTF-8"); - response.setHeader("Content-Type", "application/json"); - - try { - Project project = getProject(request); - - int rowIndex = Integer.parseInt(request.getParameter("row")); - - String starredString = request.getParameter("starred"); - if (starredString != null) { - boolean starred = "true".endsWith(starredString); - String description = (starred ? "Star row " : "Unstar row ") + (rowIndex + 1); - - StarOneRowProcess process = new StarOneRowProcess( - project, - description, - rowIndex, - starred - ); - - performProcessAndRespond(request, response, project, process); - return; - } - - String flaggedString = request.getParameter("flagged"); - if (flaggedString != null) { - boolean flagged = "true".endsWith(flaggedString); - String description = (flagged ? "Flag row " : "Unflag row ") + (rowIndex + 1); - - FlagOneRowProcess process = new FlagOneRowProcess( - project, - description, - rowIndex, - flagged - ); - - performProcessAndRespond(request, response, project, process); - return; - } - - respond(response, "{ \"code\" : \"error\", \"message\" : \"invalid command parameters\" }"); - - } catch (Exception e) { - respondException(response, e); - } - } - - protected static class StarOneRowProcess extends QuickHistoryEntryProcess { - final int rowIndex; - final boolean starred; - - StarOneRowProcess( - Project project, - String briefDescription, - int rowIndex, - boolean starred - ) { - super(project, briefDescription); - - this.rowIndex = rowIndex; - this.starred = starred; - } - - protected HistoryEntry createHistoryEntry(long historyEntryID) throws Exception { - return new HistoryEntry( - historyEntryID, - _project, - (starred ? "Star row " : "Unstar row ") + (rowIndex + 1), - null, - new RowStarChange(rowIndex, starred) - ); - } - } - protected static class FlagOneRowProcess extends QuickHistoryEntryProcess { - final int rowIndex; - final boolean flagged; - - FlagOneRowProcess( - Project project, - String briefDescription, - int rowIndex, - boolean flagged - ) { - super(project, briefDescription); - - this.rowIndex = rowIndex; - this.flagged = flagged; - } - - protected HistoryEntry createHistoryEntry(long historyEntryID) throws Exception { - return new HistoryEntry( - historyEntryID, - _project, - (flagged ? "Flag row " : "Unflag row ") + (rowIndex + 1), - null, - new RowFlagChange(rowIndex, flagged) - ); - } - } -} diff --git a/main/src/com/google/gridworks/commands/row/AnnotateRowsCommand.java b/main/src/com/google/gridworks/commands/row/AnnotateRowsCommand.java deleted file mode 100644 index 3bd98159c..000000000 --- a/main/src/com/google/gridworks/commands/row/AnnotateRowsCommand.java +++ /dev/null @@ -1,34 +0,0 @@ -package com.google.gridworks.commands.row; - -import javax.servlet.http.HttpServletRequest; - -import org.json.JSONObject; - -import com.google.gridworks.commands.EngineDependentCommand; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Project; -import com.google.gridworks.operations.row.RowFlagOperation; -import com.google.gridworks.operations.row.RowStarOperation; - -public class AnnotateRowsCommand extends EngineDependentCommand { - - @Override - protected AbstractOperation createOperation(Project project, - HttpServletRequest request, JSONObject engineConfig) throws Exception { - - String starredString = request.getParameter("starred"); - if (starredString != null) { - boolean starred = "true".endsWith(starredString); - - return new RowStarOperation(engineConfig, starred); - } - - String flaggedString = request.getParameter("flagged"); - if (flaggedString != null) { - boolean flagged = "true".endsWith(flaggedString); - - return new RowFlagOperation(engineConfig, flagged); - } - return null; - } -} diff --git a/main/src/com/google/gridworks/commands/row/DenormalizeCommand.java b/main/src/com/google/gridworks/commands/row/DenormalizeCommand.java deleted file mode 100644 index 68a6809b3..000000000 --- a/main/src/com/google/gridworks/commands/row/DenormalizeCommand.java +++ /dev/null @@ -1,32 +0,0 @@ -package com.google.gridworks.commands.row; - -import java.io.IOException; -import java.util.Properties; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import com.google.gridworks.commands.Command; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Project; -import com.google.gridworks.operations.row.DenormalizeOperation; -import com.google.gridworks.process.Process; - -public class DenormalizeCommand extends Command { - @Override - public void doPost(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - try { - Project project = getProject(request); - - AbstractOperation op = new DenormalizeOperation(); - Process process = op.createProcess(project, new Properties()); - - performProcessAndRespond(request, response, project, process); - } catch (Exception e) { - respondException(response, e); - } - } -} diff --git a/main/src/com/google/gridworks/commands/row/GetRowsCommand.java b/main/src/com/google/gridworks/commands/row/GetRowsCommand.java deleted file mode 100644 index 241a7580d..000000000 --- a/main/src/com/google/gridworks/commands/row/GetRowsCommand.java +++ /dev/null @@ -1,208 +0,0 @@ -package com.google.gridworks.commands.row; - -import java.io.IOException; -import java.io.PrintWriter; -import java.util.Properties; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.browsing.Engine; -import com.google.gridworks.browsing.FilteredRecords; -import com.google.gridworks.browsing.FilteredRows; -import com.google.gridworks.browsing.RecordVisitor; -import com.google.gridworks.browsing.RowVisitor; -import com.google.gridworks.browsing.Engine.Mode; -import com.google.gridworks.commands.Command; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Record; -import com.google.gridworks.model.Row; -import com.google.gridworks.sorting.SortingRecordVisitor; -import com.google.gridworks.sorting.SortingRowVisitor; -import com.google.gridworks.util.ParsingUtilities; -import com.google.gridworks.util.Pool; - -public class GetRowsCommand extends Command { - - public void doPost(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - internalRespond(request, response); - } - - @Override - public void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { - internalRespond(request, response); - } - - protected void internalRespond(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - try { - Project project = getProject(request); - Engine engine = getEngine(request, project); - String callback = request.getParameter("callback"); - - int start = Math.min(project.rows.size(), Math.max(0, getIntegerParameter(request, "start", 0))); - int limit = Math.min(project.rows.size() - start, Math.max(0, getIntegerParameter(request, "limit", 20))); - - Pool pool = new Pool(); - Properties options = new Properties(); - options.put("project", project); - options.put("reconCandidateOmitTypes", true); - options.put("pool", pool); - - response.setCharacterEncoding("UTF-8"); - response.setHeader("Content-Type", callback == null ? "application/json" : "text/javascript"); - - PrintWriter writer = response.getWriter(); - if (callback != null) { - writer.write(callback); - writer.write("("); - } - - JSONWriter jsonWriter = new JSONWriter(writer); - jsonWriter.object(); - - RowWritingVisitor rwv = new RowWritingVisitor(start, limit, jsonWriter, options); - - JSONObject sortingJson = null; - try{ - String json = request.getParameter("sorting"); - sortingJson = (json == null) ? null : - ParsingUtilities.evaluateJsonStringToObject(json); - } catch (JSONException e) { - } - - if (engine.getMode() == Mode.RowBased) { - FilteredRows filteredRows = engine.getAllFilteredRows(); - RowVisitor visitor = rwv; - - if (sortingJson != null) { - SortingRowVisitor srv = new SortingRowVisitor(visitor); - - srv.initializeFromJSON(project, sortingJson); - if (srv.hasCriteria()) { - visitor = srv; - } - } - - jsonWriter.key("mode"); jsonWriter.value("row-based"); - jsonWriter.key("rows"); jsonWriter.array(); - filteredRows.accept(project, visitor); - jsonWriter.endArray(); - jsonWriter.key("filtered"); jsonWriter.value(rwv.total); - jsonWriter.key("total"); jsonWriter.value(project.rows.size()); - } else { - FilteredRecords filteredRecords = engine.getFilteredRecords(); - RecordVisitor visitor = rwv; - - if (sortingJson != null) { - SortingRecordVisitor srv = new SortingRecordVisitor(visitor); - - srv.initializeFromJSON(project, sortingJson); - if (srv.hasCriteria()) { - visitor = srv; - } - } - - jsonWriter.key("mode"); jsonWriter.value("record-based"); - jsonWriter.key("rows"); jsonWriter.array(); - filteredRecords.accept(project, visitor); - jsonWriter.endArray(); - jsonWriter.key("filtered"); jsonWriter.value(rwv.total); - jsonWriter.key("total"); jsonWriter.value(project.recordModel.getRecordCount()); - } - - - jsonWriter.key("start"); jsonWriter.value(start); - jsonWriter.key("limit"); jsonWriter.value(limit); - jsonWriter.key("pool"); pool.write(jsonWriter, options); - - jsonWriter.endObject(); - - if (callback != null) { - writer.write(")"); - } - } catch (Exception e) { - respondException(response, e); - } - } - - static protected class RowWritingVisitor implements RowVisitor, RecordVisitor { - final int start; - final int limit; - final JSONWriter writer; - final Properties options; - - public int total; - - public RowWritingVisitor(int start, int limit, JSONWriter writer, Properties options) { - this.start = start; - this.limit = limit; - this.writer = writer; - this.options = options; - } - - @Override - public void start(Project project) { - // nothing to do - } - - @Override - public void end(Project project) { - // nothing to do - } - - public boolean visit(Project project, int rowIndex, Row row) { - if (total >= start && total < start + limit) { - internalVisit(project, rowIndex, row); - } - total++; - - return false; - } - - @Override - public boolean visit(Project project, Record record) { - if (total >= start && total < start + limit) { - internalVisit(project, record); - } - total++; - - return false; - } - - public boolean internalVisit(Project project, int rowIndex, Row row) { - try { - options.put("rowIndex", rowIndex); - row.write(writer, options); - } catch (JSONException e) { - } - return false; - } - - protected boolean internalVisit(Project project, Record record) { - options.put("recordIndex", record.recordIndex); - - for (int r = record.fromRowIndex; r < record.toRowIndex; r++) { - try { - Row row = project.rows.get(r); - - options.put("rowIndex", r); - - row.write(writer, options); - - } catch (JSONException e) { - } - - options.remove("recordIndex"); - } - return false; - } - } -} diff --git a/main/src/com/google/gridworks/commands/row/RemoveRowsCommand.java b/main/src/com/google/gridworks/commands/row/RemoveRowsCommand.java deleted file mode 100644 index 2a5b7cfab..000000000 --- a/main/src/com/google/gridworks/commands/row/RemoveRowsCommand.java +++ /dev/null @@ -1,20 +0,0 @@ -package com.google.gridworks.commands.row; - -import javax.servlet.http.HttpServletRequest; - -import org.json.JSONObject; - -import com.google.gridworks.commands.EngineDependentCommand; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Project; -import com.google.gridworks.operations.row.RowRemovalOperation; - -public class RemoveRowsCommand extends EngineDependentCommand { - - @Override - protected AbstractOperation createOperation(Project project, - HttpServletRequest request, JSONObject engineConfig) throws Exception { - - return new RowRemovalOperation(engineConfig); - } -} diff --git a/main/src/com/google/gridworks/commands/row/ReorderRowsCommand.java b/main/src/com/google/gridworks/commands/row/ReorderRowsCommand.java deleted file mode 100644 index bd5b0663f..000000000 --- a/main/src/com/google/gridworks/commands/row/ReorderRowsCommand.java +++ /dev/null @@ -1,34 +0,0 @@ -package com.google.gridworks.commands.row; - -import javax.servlet.http.HttpServletRequest; - -import org.json.JSONException; -import org.json.JSONObject; - -import com.google.gridworks.browsing.Engine; -import com.google.gridworks.commands.EngineDependentCommand; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Project; -import com.google.gridworks.operations.row.RowReorderOperation; -import com.google.gridworks.util.ParsingUtilities; - -public class ReorderRowsCommand extends EngineDependentCommand { - - @Override - protected AbstractOperation createOperation(Project project, - HttpServletRequest request, JSONObject engineConfig) throws Exception { - - String mode = request.getParameter("mode"); - JSONObject sorting = null; - - try{ - String json = request.getParameter("sorting"); - - sorting = (json == null) ? null : ParsingUtilities.evaluateJsonStringToObject(json); - } catch (JSONException e) { - // ignore - } - - return new RowReorderOperation(Engine.stringToMode(mode), sorting); - } -} diff --git a/main/src/com/google/gridworks/commands/workspace/GetAllProjectMetadataCommand.java b/main/src/com/google/gridworks/commands/workspace/GetAllProjectMetadataCommand.java deleted file mode 100644 index f3a89027d..000000000 --- a/main/src/com/google/gridworks/commands/workspace/GetAllProjectMetadataCommand.java +++ /dev/null @@ -1,48 +0,0 @@ -package com.google.gridworks.commands.workspace; - -import java.io.IOException; -import java.util.Map; -import java.util.Properties; -import java.util.Map.Entry; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.ProjectManager; -import com.google.gridworks.ProjectMetadata; -import com.google.gridworks.commands.Command; - -public class GetAllProjectMetadataCommand extends Command { - @Override - public void doGet(HttpServletRequest request, HttpServletResponse response) - throws ServletException, IOException { - - try { - response.setCharacterEncoding("UTF-8"); - response.setHeader("Content-Type", "application/json"); - - JSONWriter writer = new JSONWriter(response.getWriter()); - Properties options = new Properties(); - - writer.object(); - writer.key("projects"); - writer.object(); - Map m = ProjectManager.singleton.getAllProjectMetadata(); - for (Entry e : m.entrySet()) { - ProjectMetadata pm = e.getValue(); - if (pm != null) { - writer.key(e.getKey().toString()); - e.getValue().write(writer, options); - } - } - writer.endObject(); - writer.endObject(); - } catch (JSONException e) { - respondException(response, e); - } - } -} \ No newline at end of file diff --git a/main/src/com/google/gridworks/exporters/CsvExporter.java b/main/src/com/google/gridworks/exporters/CsvExporter.java deleted file mode 100644 index 9fb1d0846..000000000 --- a/main/src/com/google/gridworks/exporters/CsvExporter.java +++ /dev/null @@ -1,115 +0,0 @@ -package com.google.gridworks.exporters; - -import java.io.IOException; -import java.io.OutputStream; -import java.io.Writer; -import java.util.Properties; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import au.com.bytecode.opencsv.CSVWriter; - -import com.google.gridworks.browsing.Engine; -import com.google.gridworks.browsing.FilteredRows; -import com.google.gridworks.browsing.RowVisitor; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; - -public class CsvExporter implements Exporter{ - - final static Logger logger = LoggerFactory.getLogger("CsvExporter"); - char separator; - - public CsvExporter() { - separator = ','; //Comma separated-value is default - } - - public CsvExporter(char separator) { - this.separator = separator; - } - - @Override - public void export(Project project, Properties options, Engine engine, OutputStream outputStream) - throws IOException { - throw new RuntimeException("Not implemented"); - } - - @Override - public void export(Project project, Properties options, Engine engine, Writer writer) throws IOException { - boolean printColumnHeader = true; - - if (options != null && options.getProperty("printColumnHeader") != null) { - printColumnHeader = Boolean.parseBoolean(options.getProperty("printColumnHeader")); - } - - RowVisitor visitor = new RowVisitor() { - CSVWriter csvWriter; - boolean printColumnHeader = true; - boolean isFirstRow = true; //the first row should also add the column headers - - public RowVisitor init(CSVWriter writer, boolean printColumnHeader) { - this.csvWriter = writer; - this.printColumnHeader = printColumnHeader; - return this; - } - - public boolean visit(Project project, int rowIndex, Row row) { - int size = project.columnModel.columns.size(); - - String[] cols = new String[size]; - String[] vals = new String[size]; - - int i = 0; - for (Column col : project.columnModel.columns) { - int cellIndex = col.getCellIndex(); - cols[i] = col.getName(); - - Object value = row.getCellValue(cellIndex); - if (value != null) { - vals[i] = value instanceof String ? (String) value : value.toString(); - } - i++; - } - - if (printColumnHeader && isFirstRow) { - csvWriter.writeNext(cols,false); - isFirstRow = false; //switch off flag - } - csvWriter.writeNext(vals,false); - - return false; - } - - @Override - public void start(Project project) { - // nothing to do - } - - @Override - public void end(Project project) { - try { - csvWriter.close(); - } catch (IOException e) { - logger.error("CsvExporter could not close writer : " + e.getMessage()); - } - } - - }.init(new CSVWriter(writer, separator), printColumnHeader); - - FilteredRows filteredRows = engine.getAllFilteredRows(); - filteredRows.accept(project, visitor); - } - - @Override - public String getContentType() { - return "application/x-unknown"; - } - - @Override - public boolean takeWriter() { - return true; - } - -} diff --git a/main/src/com/google/gridworks/exporters/Exporter.java b/main/src/com/google/gridworks/exporters/Exporter.java deleted file mode 100644 index f21174c84..000000000 --- a/main/src/com/google/gridworks/exporters/Exporter.java +++ /dev/null @@ -1,19 +0,0 @@ -package com.google.gridworks.exporters; - -import java.io.IOException; -import java.io.OutputStream; -import java.io.Writer; -import java.util.Properties; - -import com.google.gridworks.browsing.Engine; -import com.google.gridworks.model.Project; - -public interface Exporter { - public String getContentType(); - - public boolean takeWriter(); - - public void export(Project project, Properties options, Engine engine, OutputStream outputStream) throws IOException; - - public void export(Project project, Properties options, Engine engine, Writer writer) throws IOException; -} diff --git a/main/src/com/google/gridworks/exporters/ExporterRegistry.java b/main/src/com/google/gridworks/exporters/ExporterRegistry.java deleted file mode 100644 index ea5578b55..000000000 --- a/main/src/com/google/gridworks/exporters/ExporterRegistry.java +++ /dev/null @@ -1,31 +0,0 @@ -package com.google.gridworks.exporters; - -import java.util.HashMap; -import java.util.Map; - -import com.google.gridworks.exporters.ProtographTransposeExporter.MqlwriteLikeExporter; -import com.google.gridworks.exporters.ProtographTransposeExporter.TripleLoaderExporter; - - -abstract public class ExporterRegistry { - static final private Map s_formatToExporter = new HashMap(); - - static { - s_formatToExporter.put("html", new HtmlTableExporter()); - s_formatToExporter.put("xls", new XlsExporter()); - s_formatToExporter.put("csv", new CsvExporter()); - - s_formatToExporter.put("template", new TemplatingExporter()); - - s_formatToExporter.put("tripleloader", new TripleLoaderExporter()); - s_formatToExporter.put("mqlwrite", new MqlwriteLikeExporter()); - } - - static public void registerExporter(String format, Exporter exporter) { - s_formatToExporter.put(format.toLowerCase(), exporter); - } - - static public Exporter getExporter(String format) { - return s_formatToExporter.get(format.toLowerCase()); - } -} diff --git a/main/src/com/google/gridworks/exporters/HtmlTableExporter.java b/main/src/com/google/gridworks/exporters/HtmlTableExporter.java deleted file mode 100644 index 43dca42ac..000000000 --- a/main/src/com/google/gridworks/exporters/HtmlTableExporter.java +++ /dev/null @@ -1,105 +0,0 @@ -package com.google.gridworks.exporters; - -import java.io.IOException; -import java.io.OutputStream; -import java.io.Writer; -import java.util.Properties; - -import com.google.gridworks.ProjectManager; -import com.google.gridworks.browsing.Engine; -import com.google.gridworks.browsing.FilteredRows; -import com.google.gridworks.browsing.RowVisitor; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; - -public class HtmlTableExporter implements Exporter { - public String getContentType() { - return "text/html"; - } - - public boolean takeWriter() { - return true; - } - - public void export(Project project, Properties options, Engine engine, - OutputStream outputStream) throws IOException { - throw new RuntimeException("Not implemented"); - } - - public void export(Project project, Properties options, Engine engine, Writer writer) throws IOException { - writer.write("\n"); - writer.write(""); - writer.write(ProjectManager.singleton.getProjectMetadata(project.id).getName()); - writer.write("\n"); - - writer.write("\n"); - writer.write("\n"); - - writer.write(""); - { - for (Column column : project.columnModel.columns) { - writer.write(""); - } - } - writer.write("\n"); - - { - RowVisitor visitor = new RowVisitor() { - Writer writer; - - public RowVisitor init(Writer writer) { - this.writer = writer; - return this; - } - - @Override - public void start(Project project) { - // nothing to do - } - - @Override - public void end(Project project) { - // nothing to do - } - - public boolean visit(Project project, int rowIndex, Row row) { - try { - writer.write(""); - - for (Column column : project.columnModel.columns) { - writer.write(""); - } - - writer.write("\n"); - } catch (IOException e) { - // ignore - } - return false; - } - }.init(writer); - - FilteredRows filteredRows = engine.getAllFilteredRows(); - filteredRows.accept(project, visitor); - } - - writer.write("
"); - writer.write(column.getName()); - writer.write("
"); - - int cellIndex = column.getCellIndex(); - if (cellIndex < row.cells.size()) { - Cell cell = row.cells.get(cellIndex); - if (cell != null && cell.value != null) { - Object v = cell.value; - writer.write(v instanceof String ? ((String) v) : v.toString()); - } - } - - writer.write("
\n"); - writer.write("\n"); - writer.write("\n"); - } - -} diff --git a/main/src/com/google/gridworks/exporters/ProtographTransposeExporter.java b/main/src/com/google/gridworks/exporters/ProtographTransposeExporter.java deleted file mode 100644 index 7319e465c..000000000 --- a/main/src/com/google/gridworks/exporters/ProtographTransposeExporter.java +++ /dev/null @@ -1,74 +0,0 @@ -package com.google.gridworks.exporters; - -import java.io.IOException; -import java.io.OutputStream; -import java.io.Writer; -import java.util.Properties; - -import com.google.gridworks.browsing.Engine; -import com.google.gridworks.model.Project; -import com.google.gridworks.protograph.Protograph; -import com.google.gridworks.protograph.transpose.MqlwriteLikeTransposedNodeFactory; -import com.google.gridworks.protograph.transpose.TransposedNodeFactory; -import com.google.gridworks.protograph.transpose.Transposer; -import com.google.gridworks.protograph.transpose.TripleLoaderTransposedNodeFactory; - -abstract public class ProtographTransposeExporter implements Exporter { - final protected String _contentType; - - public ProtographTransposeExporter(String contentType) { - _contentType = contentType; - } - - public String getContentType() { - return "application/x-unknown"; - } - - public boolean takeWriter() { - return true; - } - - public void export(Project project, Properties options, Engine engine, - OutputStream outputStream) throws IOException { - throw new RuntimeException("Not implemented"); - } - - public void export(Project project, Properties options, Engine engine, - Writer writer) throws IOException { - - Protograph protograph = (Protograph) project.overlayModels.get("freebaseProtograph"); - if (protograph != null) { - TransposedNodeFactory nodeFactory = createNodeFactory(project, writer); - - Transposer.transpose(project, engine.getAllFilteredRows(), - protograph, protograph.getRootNode(0), nodeFactory, -1); - - nodeFactory.flush(); - } - } - - abstract protected TransposedNodeFactory createNodeFactory(Project project, Writer writer); - - static public class TripleLoaderExporter extends ProtographTransposeExporter { - public TripleLoaderExporter() { - super("application/x-unknown"); - } - - @Override - protected TransposedNodeFactory createNodeFactory(Project project, Writer writer) { - return new TripleLoaderTransposedNodeFactory(project, writer); - } - } - - static public class MqlwriteLikeExporter extends ProtographTransposeExporter { - public MqlwriteLikeExporter() { - super("application/x-unknown"); - } - - @Override - protected TransposedNodeFactory createNodeFactory(Project project, Writer writer) { - return new MqlwriteLikeTransposedNodeFactory(writer); - } - } - -} diff --git a/main/src/com/google/gridworks/exporters/TemplatingExporter.java b/main/src/com/google/gridworks/exporters/TemplatingExporter.java deleted file mode 100644 index 06a05f7c1..000000000 --- a/main/src/com/google/gridworks/exporters/TemplatingExporter.java +++ /dev/null @@ -1,125 +0,0 @@ -package com.google.gridworks.exporters; - -import java.io.IOException; -import java.io.OutputStream; -import java.io.StringWriter; -import java.io.Writer; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.browsing.Engine; -import com.google.gridworks.browsing.FilteredRecords; -import com.google.gridworks.browsing.FilteredRows; -import com.google.gridworks.browsing.RecordVisitor; -import com.google.gridworks.browsing.RowVisitor; -import com.google.gridworks.browsing.Engine.Mode; -import com.google.gridworks.expr.ParsingException; -import com.google.gridworks.model.Project; -import com.google.gridworks.sorting.SortingRecordVisitor; -import com.google.gridworks.sorting.SortingRowVisitor; -import com.google.gridworks.templating.Parser; -import com.google.gridworks.templating.Template; -import com.google.gridworks.util.ParsingUtilities; - -public class TemplatingExporter implements Exporter { - public String getContentType() { - return "application/x-unknown"; - } - - public boolean takeWriter() { - return true; - } - - public void export(Project project, Properties options, Engine engine, - OutputStream outputStream) throws IOException { - throw new RuntimeException("Not implemented"); - } - - public void export(Project project, Properties options, Engine engine, Writer writer) throws IOException { - String limitString = options.getProperty("limit"); - int limit = limitString != null ? Integer.parseInt(limitString) : -1; - - JSONObject sortingJson = null; - try{ - String json = options.getProperty("sorting"); - sortingJson = (json == null) ? null : - ParsingUtilities.evaluateJsonStringToObject(json); - } catch (JSONException e) { - } - - String templateString = options.getProperty("template"); - String prefixString = options.getProperty("prefix"); - String suffixString = options.getProperty("suffix"); - String separatorString = options.getProperty("separator"); - - Template template; - try { - template = Parser.parse(templateString); - } catch (ParsingException e) { - throw new IOException("Missing or bad template", e); - } - - template.setPrefix(prefixString); - template.setSuffix(suffixString); - template.setSeparator(separatorString); - - if (!"true".equals(options.getProperty("preview"))) { - StringWriter stringWriter = new StringWriter(); - JSONWriter jsonWriter = new JSONWriter(stringWriter); - try { - jsonWriter.object(); - jsonWriter.key("template"); jsonWriter.value(templateString); - jsonWriter.key("prefix"); jsonWriter.value(prefixString); - jsonWriter.key("suffix"); jsonWriter.value(suffixString); - jsonWriter.key("separator"); jsonWriter.value(separatorString); - jsonWriter.endObject(); - } catch (JSONException e) { - // ignore - } - - project.getMetadata().getPreferenceStore().put("exporters.templating.template", stringWriter.toString()); - } - - if (engine.getMode() == Mode.RowBased) { - FilteredRows filteredRows = engine.getAllFilteredRows(); - RowVisitor visitor = template.getRowVisitor(writer, limit); - - if (sortingJson != null) { - try { - SortingRowVisitor srv = new SortingRowVisitor(visitor); - srv.initializeFromJSON(project, sortingJson); - - if (srv.hasCriteria()) { - visitor = srv; - } - } catch (JSONException e) { - e.printStackTrace(); - } - } - - filteredRows.accept(project, visitor); - } else { - FilteredRecords filteredRecords = engine.getFilteredRecords(); - RecordVisitor visitor = template.getRecordVisitor(writer, limit); - - if (sortingJson != null) { - try { - SortingRecordVisitor srv = new SortingRecordVisitor(visitor); - srv.initializeFromJSON(project, sortingJson); - - if (srv.hasCriteria()) { - visitor = srv; - } - } catch (JSONException e) { - e.printStackTrace(); - } - } - - filteredRecords.accept(project, visitor); - } - } - -} diff --git a/main/src/com/google/gridworks/exporters/XlsExporter.java b/main/src/com/google/gridworks/exporters/XlsExporter.java deleted file mode 100644 index 17f62bb6f..000000000 --- a/main/src/com/google/gridworks/exporters/XlsExporter.java +++ /dev/null @@ -1,126 +0,0 @@ -package com.google.gridworks.exporters; - -import java.io.IOException; -import java.io.OutputStream; -import java.io.Writer; -import java.util.Calendar; -import java.util.Date; -import java.util.Properties; - -import org.apache.poi.hssf.usermodel.HSSFHyperlink; -import org.apache.poi.hssf.usermodel.HSSFWorkbook; -import org.apache.poi.ss.usermodel.Sheet; -import org.apache.poi.ss.usermodel.Workbook; - -import com.google.gridworks.ProjectManager; -import com.google.gridworks.browsing.Engine; -import com.google.gridworks.browsing.FilteredRows; -import com.google.gridworks.browsing.RowVisitor; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; - -public class XlsExporter implements Exporter { - public String getContentType() { - return "application/xls"; - } - - public boolean takeWriter() { - return false; - } - - public void export(Project project, Properties options, Engine engine, Writer writer) throws IOException { - throw new RuntimeException("Not implemented"); - } - - public void export(Project project, Properties options, Engine engine, - OutputStream outputStream) throws IOException { - - Workbook wb = new HSSFWorkbook(); - Sheet s = wb.createSheet(); - wb.setSheetName(0, ProjectManager.singleton.getProjectMetadata(project.id).getName()); - - int rowCount = 0; - - { - org.apache.poi.ss.usermodel.Row r = s.createRow(rowCount++); - - int cellCount = 0; - for (Column column : project.columnModel.columns) { - org.apache.poi.ss.usermodel.Cell c = r.createCell(cellCount++); - c.setCellValue(column.getName()); - } - } - - { - RowVisitor visitor = new RowVisitor() { - Sheet sheet; - int rowCount; - - public RowVisitor init(Sheet sheet, int rowCount) { - this.sheet = sheet; - this.rowCount = rowCount; - return this; - } - - @Override - public void start(Project project) { - // nothing to do - } - - @Override - public void end(Project project) { - // nothing to do - } - - public boolean visit(Project project, int rowIndex, Row row) { - org.apache.poi.ss.usermodel.Row r = sheet.createRow(rowCount++); - - int cellCount = 0; - for (Column column : project.columnModel.columns) { - org.apache.poi.ss.usermodel.Cell c = r.createCell(cellCount++); - - int cellIndex = column.getCellIndex(); - if (cellIndex < row.cells.size()) { - Cell cell = row.cells.get(cellIndex); - if (cell != null) { - if (cell.recon != null && cell.recon.match != null) { - c.setCellValue(cell.recon.match.name); - - HSSFHyperlink hl = new HSSFHyperlink(HSSFHyperlink.LINK_URL); - hl.setLabel(cell.recon.match.name); - hl.setAddress("http://www.freebase.com/view" + cell.recon.match.id); - - c.setHyperlink(hl); - } else if (cell.value != null) { - Object v = cell.value; - - if (v instanceof Number) { - c.setCellValue(((Number) v).doubleValue()); - } else if (v instanceof Boolean) { - c.setCellValue(((Boolean) v).booleanValue()); - } else if (v instanceof Date) { - c.setCellValue((Date) v); - } else if (v instanceof Calendar) { - c.setCellValue((Calendar) v); - } else if (v instanceof String) { - c.setCellValue((String) v); - } - } - } - } - } - return false; - } - }.init(s, rowCount); - - FilteredRows filteredRows = engine.getAllFilteredRows(); - filteredRows.accept(project, visitor); - } - - wb.write(outputStream); - outputStream.flush(); - } - -} diff --git a/main/src/com/google/gridworks/expr/Binder.java b/main/src/com/google/gridworks/expr/Binder.java deleted file mode 100644 index 28f82c0a7..000000000 --- a/main/src/com/google/gridworks/expr/Binder.java +++ /dev/null @@ -1,13 +0,0 @@ -package com.google.gridworks.expr; - -import java.util.Properties; - -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; - -public interface Binder { - public void initializeBindings(Properties bindings, Project project); - - public void bind(Properties bindings, Row row, int rowIndex, String columnName, Cell cell); -} diff --git a/main/src/com/google/gridworks/expr/CellTuple.java b/main/src/com/google/gridworks/expr/CellTuple.java deleted file mode 100644 index ab043f58f..000000000 --- a/main/src/com/google/gridworks/expr/CellTuple.java +++ /dev/null @@ -1,35 +0,0 @@ -package com.google.gridworks.expr; - -import java.util.Properties; - -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; - -public class CellTuple implements HasFields { - final public Project project; - final public Row row; - - public CellTuple(Project project, Row row) { - this.project = project; - this.row = row; - } - - public Object getField(String name, Properties bindings) { - Column column = project.columnModel.getColumnByName(name); - if (column != null) { - int cellIndex = column.getCellIndex(); - Cell cell = row.getCell(cellIndex); - - if (cell != null) { - return new WrappedCell(project, name, cell); - } - } - return null; - } - - public boolean fieldAlsoHasFields(String name) { - return true; - } -} \ No newline at end of file diff --git a/main/src/com/google/gridworks/expr/EvalError.java b/main/src/com/google/gridworks/expr/EvalError.java deleted file mode 100644 index 1914d2abe..000000000 --- a/main/src/com/google/gridworks/expr/EvalError.java +++ /dev/null @@ -1,39 +0,0 @@ -package com.google.gridworks.expr; - -import java.io.Serializable; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.Jsonizable; - -/** - * An error that occurs during the evaluation of an Evaluable. Errors are values, too - * because they can be stored in cells just like strings, numbers, etc. Errors are not - * thrown because an error might occupy just one element in an array and doesn't need - * to make the whole array erroneous. - */ -public class EvalError implements Serializable, Jsonizable { - private static final long serialVersionUID = -102681220092874080L; - - final public String message; - - public EvalError(String message) { - this.message = message; - } - - public String toString() { - return this.message; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("type"); writer.value("error"); - writer.key("message"); writer.value(message); - writer.endObject(); - } - -} diff --git a/main/src/com/google/gridworks/expr/Evaluable.java b/main/src/com/google/gridworks/expr/Evaluable.java deleted file mode 100644 index 3b991d620..000000000 --- a/main/src/com/google/gridworks/expr/Evaluable.java +++ /dev/null @@ -1,16 +0,0 @@ -package com.google.gridworks.expr; - -import java.util.Properties; - -/** - * Interface for evaluable expressions in any arbitrary language. - */ -public interface Evaluable { - /** - * Evaluate this expression in the given environment (bindings). - * - * @param bindings - * @return - */ - public Object evaluate(Properties bindings); -} diff --git a/main/src/com/google/gridworks/expr/ExpressionUtils.java b/main/src/com/google/gridworks/expr/ExpressionUtils.java deleted file mode 100644 index 95ca6b6da..000000000 --- a/main/src/com/google/gridworks/expr/ExpressionUtils.java +++ /dev/null @@ -1,146 +0,0 @@ -package com.google.gridworks.expr; - -import java.io.Serializable; -import java.util.Calendar; -import java.util.Collection; -import java.util.Date; -import java.util.HashSet; -import java.util.List; -import java.util.Properties; -import java.util.Set; - -import org.json.JSONArray; -import org.json.JSONObject; - -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; - -public class ExpressionUtils { - static protected Set s_binders = new HashSet(); - - static public void registerBinder(Binder binder) { - s_binders.add(binder); - } - - static public Properties createBindings(Project project) { - Properties bindings = new Properties(); - - bindings.put("true", true); - bindings.put("false", false); - - bindings.put("project", project); - - for (Binder binder : s_binders) { - binder.initializeBindings(bindings, project); - } - - return bindings; - } - - static public void bind(Properties bindings, Row row, int rowIndex, String columnName, Cell cell) { - Project project = (Project) bindings.get("project"); - - bindings.put("rowIndex", rowIndex); - bindings.put("row", new WrappedRow(project, rowIndex, row)); - bindings.put("cells", new CellTuple(project, row)); - - if (columnName != null) { - bindings.put("columnName", columnName); - } - - if (cell == null) { - bindings.remove("cell"); - bindings.remove("value"); - } else { - bindings.put("cell", new WrappedCell(project, columnName, cell)); - if (cell.value == null) { - bindings.remove("value"); - } else { - bindings.put("value", cell.value); - } - } - - for (Binder binder : s_binders) { - binder.bind(bindings, row, rowIndex, columnName, cell); - } - } - - static public boolean isError(Object o) { - return o != null && o instanceof EvalError; - } - /* - static public boolean isBlank(Object o) { - return o == null || (o instanceof String && ((String) o).length() == 0); - } - */ - static public boolean isNonBlankData(Object o) { - return - o != null && - !(o instanceof EvalError) && - (!(o instanceof String) || ((String) o).length() > 0); - } - - static public boolean isTrue(Object o) { - return o != null && - (o instanceof Boolean ? - ((Boolean) o).booleanValue() : - Boolean.parseBoolean(o.toString())); - } - - static public boolean sameValue(Object v1, Object v2) { - if (v1 == null) { - return (v2 == null) - || (v2 instanceof String && ((String) v2).length() == 0); - } else if (v2 == null) { - return (v1 == null) - || (v1 instanceof String && ((String) v1).length() == 0); - } else { - return v1.equals(v2); - } - } - - static public boolean isStorable(Object v) { - return v == null || - v instanceof Number || - v instanceof String || - v instanceof Boolean || - v instanceof Date || - v instanceof Calendar || - v instanceof EvalError; - } - - static public Serializable wrapStorable(Object v) { - if (v instanceof JSONArray) { - return ((JSONArray) v).toString(); - } else if (v instanceof JSONObject) { - return ((JSONObject) v).toString(); - } else { - return isStorable(v) ? - (Serializable) v : - new EvalError(v.getClass().getSimpleName() + " value not storable"); - } - } - - static public boolean isArray(Object v) { - return v != null && v.getClass().isArray(); - } - - static public boolean isArrayOrCollection(Object v) { - return v != null && (v.getClass().isArray() || v instanceof Collection); - } - - static public boolean isArrayOrList(Object v) { - return v != null && (v.getClass().isArray() || v instanceof List); - } - - @SuppressWarnings("unchecked") - static public List toObjectList(Object v) { - return (List) v; - } - - @SuppressWarnings("unchecked") - static public Collection toObjectCollection(Object v) { - return (Collection) v; - } -} diff --git a/main/src/com/google/gridworks/expr/HasFields.java b/main/src/com/google/gridworks/expr/HasFields.java deleted file mode 100644 index 8d1c11cc3..000000000 --- a/main/src/com/google/gridworks/expr/HasFields.java +++ /dev/null @@ -1,13 +0,0 @@ -package com.google.gridworks.expr; - -import java.util.Properties; - -/** - * Interface for objects that have named fields, which can be retrieved using the - * dot notation or the bracket notation, e.g., cells.Country, cells["Type of Disaster"]. - */ -public interface HasFields { - public Object getField(String name, Properties bindings); - - public boolean fieldAlsoHasFields(String name); -} diff --git a/main/src/com/google/gridworks/expr/HasFieldsList.java b/main/src/com/google/gridworks/expr/HasFieldsList.java deleted file mode 100644 index 8477cad51..000000000 --- a/main/src/com/google/gridworks/expr/HasFieldsList.java +++ /dev/null @@ -1,14 +0,0 @@ -package com.google.gridworks.expr; - -/** - * Interface for objects each of which is a list of HasFields objects of the - * same kind (e.g., list of cells). Its getField method thus returns either - * another HasFieldsList object or an array or java.util.List of objects. - */ -public interface HasFieldsList extends HasFields { - public int length(); - - public HasFields get(int index); - - public HasFieldsList getSubList(int from, int to); -} diff --git a/main/src/com/google/gridworks/expr/HasFieldsListImpl.java b/main/src/com/google/gridworks/expr/HasFieldsListImpl.java deleted file mode 100644 index dcea0b485..000000000 --- a/main/src/com/google/gridworks/expr/HasFieldsListImpl.java +++ /dev/null @@ -1,41 +0,0 @@ -package com.google.gridworks.expr; - -import java.util.ArrayList; -import java.util.Properties; - -public class HasFieldsListImpl extends ArrayList implements HasFieldsList { - private static final long serialVersionUID = -8635194387420305802L; - - public Object getField(String name, Properties bindings) { - int c = size(); - if (c > 0 && get(0).fieldAlsoHasFields(name)) { - HasFieldsListImpl l = new HasFieldsListImpl(); - for (int i = 0; i < size(); i++) { - l.add(i, (HasFields) this.get(i).getField(name, bindings)); - } - return l; - } else { - Object[] r = new Object[this.size()]; - for (int i = 0; i < r.length; i++) { - r[i] = this.get(i).getField(name, bindings); - } - return r; - } - } - - public int length() { - return size(); - } - - public boolean fieldAlsoHasFields(String name) { - int c = size(); - return (c > 0 && get(0).fieldAlsoHasFields(name)); - } - - public HasFieldsList getSubList(int fromIndex, int toIndex) { - HasFieldsListImpl subList = new HasFieldsListImpl(); - subList.addAll(this.subList(fromIndex, toIndex)); - - return subList; - } -} diff --git a/main/src/com/google/gridworks/expr/LanguageSpecificParser.java b/main/src/com/google/gridworks/expr/LanguageSpecificParser.java deleted file mode 100644 index 4a4cda18e..000000000 --- a/main/src/com/google/gridworks/expr/LanguageSpecificParser.java +++ /dev/null @@ -1,5 +0,0 @@ -package com.google.gridworks.expr; - -public interface LanguageSpecificParser { - public Evaluable parse(String s) throws ParsingException; -} diff --git a/main/src/com/google/gridworks/expr/MetaParser.java b/main/src/com/google/gridworks/expr/MetaParser.java deleted file mode 100644 index 06ad70b84..000000000 --- a/main/src/com/google/gridworks/expr/MetaParser.java +++ /dev/null @@ -1,118 +0,0 @@ -package com.google.gridworks.expr; - -import java.io.StringReader; -import java.util.HashMap; -import java.util.Map; -import java.util.Properties; -import java.util.Set; - -import clojure.lang.IFn; - -import com.google.gridworks.gel.Parser; - -abstract public class MetaParser { - static public class LanguageInfo { - final public String name; - final public LanguageSpecificParser parser; - final public String defaultExpression; - - LanguageInfo(String name, LanguageSpecificParser parser, String defaultExpression) { - this.name = name; - this.parser = parser; - this.defaultExpression = defaultExpression; - } - } - - static protected Map s_languages; - static { - s_languages = new HashMap(); - - registerLanguageParser("gel", "Gridworks Expression Language (GEL)", new LanguageSpecificParser() { - - @Override - public Evaluable parse(String s) throws ParsingException { - return parseGEL(s); - } - }, "value"); - - registerLanguageParser("clojure", "Clojure", new LanguageSpecificParser() { - - @Override - public Evaluable parse(String s) throws ParsingException { - try { - IFn fn = (IFn) clojure.lang.Compiler.load(new StringReader( - "(fn [value cell cells row rowIndex] " + s + ")" - )); - - return new Evaluable() { - private IFn _fn; - - public Evaluable init(IFn fn) { - _fn = fn; - return this; - } - - public Object evaluate(Properties bindings) { - try { - return _fn.invoke( - bindings.get("value"), - bindings.get("cell"), - bindings.get("cells"), - bindings.get("row"), - bindings.get("rowIndex") - ); - } catch (Exception e) { - return new EvalError(e.getMessage()); - } - } - }.init(fn); - } catch (Exception e) { - throw new ParsingException(e.getMessage()); - } - } - }, "value"); - } - - static public void registerLanguageParser(String languagePrefix, String name, LanguageSpecificParser parser, String defaultExpression) { - s_languages.put(languagePrefix, new LanguageInfo(name, parser, defaultExpression)); - } - - static public LanguageInfo getLanguageInfo(String languagePrefix) { - return s_languages.get(languagePrefix.toLowerCase()); - } - - static public Set getLanguagePrefixes() { - return s_languages.keySet(); - } - - /** - * Parse an expression that might have a language prefix into an Evaluable. - * Expressions without valid prefixes or without any prefix are assumed to be - * GEL expressions. - * - * @param s - * @return - * @throws ParsingException - */ - static public Evaluable parse(String s) throws ParsingException { - String language = "gel"; - - int colon = s.indexOf(':'); - if (colon >= 0) { - language = s.substring(0, colon); - } - - LanguageInfo info = s_languages.get(language.toLowerCase()); - if (info != null) { - return info.parser.parse(s.substring(colon + 1)); - } else { - return parseGEL(s); - } - } - - static protected Evaluable parseGEL(String s) throws ParsingException { - Parser parser = new Parser(s); - - return parser.getExpression(); - } -} diff --git a/main/src/com/google/gridworks/expr/ParsingException.java b/main/src/com/google/gridworks/expr/ParsingException.java deleted file mode 100644 index adf2c6e97..000000000 --- a/main/src/com/google/gridworks/expr/ParsingException.java +++ /dev/null @@ -1,16 +0,0 @@ -/** - * - */ -package com.google.gridworks.expr; - -public class ParsingException extends Exception { - private static final long serialVersionUID = 155004505172098755L; - - public ParsingException(String message) { - super(message); - } - - public ParsingException(String message, Throwable cause) { - super(message, cause); - } -} \ No newline at end of file diff --git a/main/src/com/google/gridworks/expr/WrappedCell.java b/main/src/com/google/gridworks/expr/WrappedCell.java deleted file mode 100644 index da0386ef6..000000000 --- a/main/src/com/google/gridworks/expr/WrappedCell.java +++ /dev/null @@ -1,26 +0,0 @@ -package com.google.gridworks.expr; - -import java.util.Properties; - -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Project; - -public class WrappedCell implements HasFields { - final public Project project; - final public String columnName; - final public Cell cell; - - public WrappedCell(Project project, String columnName, Cell cell) { - this.project = project; - this.columnName = columnName; - this.cell = cell; - } - - public Object getField(String name, Properties bindings) { - return cell.getField(name, bindings); - } - - public boolean fieldAlsoHasFields(String name) { - return cell.fieldAlsoHasFields(name); - } -} diff --git a/main/src/com/google/gridworks/expr/WrappedRow.java b/main/src/com/google/gridworks/expr/WrappedRow.java deleted file mode 100644 index 1041a6b2b..000000000 --- a/main/src/com/google/gridworks/expr/WrappedRow.java +++ /dev/null @@ -1,93 +0,0 @@ -package com.google.gridworks.expr; - -import java.util.Properties; - -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Record; -import com.google.gridworks.model.Row; - -public class WrappedRow implements HasFields { - final public Project project; - final public int rowIndex; - final public Row row; - - public WrappedRow(Project project, int rowIndex, Row row) { - this.project = project; - this.rowIndex = rowIndex; - this.row = row; - } - - public Object getField(String name, Properties bindings) { - if ("cells".equals(name)) { - return new CellTuple(project, row); - } else if ("index".equals(name)) { - return rowIndex; - } else if ("record".equals(name)) { - int rowIndex = (Integer) bindings.get("rowIndex"); - - return new WrappedRecord(project.recordModel.getRecordOfRow(rowIndex)); - } else if ("columnNames".equals(name)) { - Project project = (Project) bindings.get("project"); - - return project.columnModel.getColumnNames(); - } else { - return row.getField(name, bindings); - } - } - - public boolean fieldAlsoHasFields(String name) { - return row.fieldAlsoHasFields(name); - } - - protected class WrappedRecord implements HasFields { - final Record _record; - - protected WrappedRecord(Record record) { - _record = record; - } - - public Object getField(String name, Properties bindings) { - if ("cells".equals(name)) { - return new RecordCells(_record); - } - return null; - } - - public boolean fieldAlsoHasFields(String name) { - return "cells".equals(name); - } - } - - protected class RecordCells implements HasFields { - final Record _record; - - protected RecordCells(Record record) { - _record = record; - } - - public Object getField(String name, Properties bindings) { - Column column = project.columnModel.getColumnByName(name); - if (column != null) { - int cellIndex = column.getCellIndex(); - - HasFieldsListImpl cells = new HasFieldsListImpl(); - for (int r = _record.fromRowIndex; r < _record.toRowIndex; r++) { - Row row = project.rows.get(r); - Cell cell = row.getCell(cellIndex); - if (cell != null && ExpressionUtils.isNonBlankData(cell.value)) { - cells.add(new WrappedCell(project, name, cell)); - } - } - - return cells; - } - return null; - } - - public boolean fieldAlsoHasFields(String name) { - return true; - } - } -} diff --git a/main/src/com/google/gridworks/expr/functions/Cross.java b/main/src/com/google/gridworks/expr/functions/Cross.java deleted file mode 100644 index 67b2025ec..000000000 --- a/main/src/com/google/gridworks/expr/functions/Cross.java +++ /dev/null @@ -1,53 +0,0 @@ -package com.google.gridworks.expr.functions; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.ProjectManager; -import com.google.gridworks.InterProjectModel.ProjectJoin; -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.expr.WrappedCell; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.Function; -import com.google.gridworks.model.Project; - -public class Cross implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length == 3) { - // from project is implied - - Object wrappedCell = args[0]; // from cell - Object toProjectName = args[1]; - Object toColumnName = args[2]; - - if (wrappedCell != null && wrappedCell instanceof WrappedCell && - toProjectName != null && toProjectName instanceof String && - toColumnName != null && toColumnName instanceof String) { - - ProjectJoin join = ProjectManager.singleton.getInterProjectModel().getJoin( - ProjectManager.singleton.getProjectMetadata( - ((Project) bindings.get("project")).id).getName(), - ((WrappedCell) wrappedCell).columnName, - (String) toProjectName, - (String) toColumnName - ); - - return join.getRows(((WrappedCell) wrappedCell).cell.value); - } - } - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a cell, a project name to join with, and a column name in that project"); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("TODO"); - writer.key("params"); writer.value("cell c, string projectName, string columnName"); - writer.key("returns"); writer.value("array"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/FacetCount.java b/main/src/com/google/gridworks/expr/functions/FacetCount.java deleted file mode 100644 index cd545dedb..000000000 --- a/main/src/com/google/gridworks/expr/functions/FacetCount.java +++ /dev/null @@ -1,64 +0,0 @@ -package com.google.gridworks.expr.functions; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.browsing.Engine; -import com.google.gridworks.browsing.util.ExpressionNominalValueGrouper; -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.expr.Evaluable; -import com.google.gridworks.expr.MetaParser; -import com.google.gridworks.expr.ParsingException; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.Function; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; - -public class FacetCount implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length == 3 && args[1] instanceof String && args[2] instanceof String) { - Object choiceValue = args[0]; // choice value to look up - String facetExpression = (String) args[1]; - String columnName = (String) args[2]; - - Project project = (Project) bindings.get("project"); - Column column = project.columnModel.getColumnByName(columnName); - if (column == null) { - return new EvalError("No such column named " + columnName); - } - - String key = "nominal-bin:" + facetExpression; - ExpressionNominalValueGrouper grouper = (ExpressionNominalValueGrouper) column.getPrecompute(key); - if (grouper == null) { - try { - Evaluable eval = MetaParser.parse(facetExpression); - Engine engine = new Engine(project); - - grouper = new ExpressionNominalValueGrouper(eval, columnName, column.getCellIndex()); - engine.getAllRows().accept(project, grouper); - - column.setPrecompute(key, grouper); - } catch (ParsingException e) { - return new EvalError("Error parsing facet expression " + facetExpression); - } - } - - return grouper.getChoiceValueCountMultiple(choiceValue); - } - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + - " expects a choice value, an expression as a string, and a column name"); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns the facet count corresponding to the given choice value"); - writer.key("params"); writer.value("choiceValue, string facetExpression, string columnName"); - writer.key("returns"); writer.value("number"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/Get.java b/main/src/com/google/gridworks/expr/functions/Get.java deleted file mode 100644 index fd358186d..000000000 --- a/main/src/com/google/gridworks/expr/functions/Get.java +++ /dev/null @@ -1,147 +0,0 @@ -package com.google.gridworks.expr.functions; - -import java.util.List; -import java.util.Properties; - -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.expr.HasFields; -import com.google.gridworks.expr.HasFieldsList; -import com.google.gridworks.gel.Function; - -public class Get implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length > 1 && args.length <= 3) { - Object v = args[0]; - Object from = args[1]; - Object to = (args.length == 3) ? args[2] : null; - - if (v != null && from != null) { - if (v instanceof HasFields && from instanceof String) { - return ((HasFields) v).getField((String) from, bindings); - } else if (v instanceof JSONObject && from instanceof String) { - try { - return ((JSONObject) v).get((String) from); - } catch (JSONException e) { - // ignore; will return null - } - } else { - if (from instanceof Number && (to == null || to instanceof Number)) { - if (v.getClass().isArray() || - v instanceof List || - v instanceof HasFieldsList || - v instanceof JSONArray) { - - int length = 0; - if (v.getClass().isArray()) { - length = ((Object[]) v).length; - } else if (v instanceof HasFieldsList) { - length = ((HasFieldsList) v).length(); - } else if (v instanceof JSONArray) { - length = ((JSONArray) v).length(); - } else { - length = ExpressionUtils.toObjectList(v).size(); - } - - int start = ((Number) from).intValue(); - if (start < 0) { - start = length + start; - } - start = Math.min(length, Math.max(0, start)); - - if (to == null) { - if (v.getClass().isArray()) { - return ((Object[]) v)[start]; - } else if (v instanceof HasFieldsList) { - return ((HasFieldsList) v).get(start); - } else if (v instanceof JSONArray) { - try { - return ((JSONArray) v).get(start); - } catch (JSONException e) { - // ignore; will return null - } - } else { - return ExpressionUtils.toObjectList(v).get(start); - } - } else { - int end = (to != null) ? ((Number) to).intValue() : length; - - if (end < 0) { - end = length + end; - } - end = Math.min(length, Math.max(start, end)); - - if (end > start) { - if (v.getClass().isArray()) { - Object[] a2 = new Object[end - start]; - - System.arraycopy((Object[]) v, start, a2, 0, end - start); - - return a2; - } else if (v instanceof HasFieldsList) { - return ((HasFieldsList) v).getSubList(start, end); - } else if (v instanceof JSONArray) { - JSONArray a = (JSONArray) v; - Object[] a2 = new Object[end - start]; - - for (int i = 0; i < a2.length; i++) { - try { - a2[i] = a.get(start + i); - } catch (JSONException e) { - // ignore - } - } - - return a2; - } else { - return ExpressionUtils.toObjectList(v).subList(start, end); - } - } - } - } else { - String s = (v instanceof String) ? (String) v : v.toString(); - - int start = ((Number) from).intValue(); - if (start < 0) { - start = s.length() + start; - } - start = Math.min(s.length(), Math.max(0, start)); - - if (to != null) { - int end = ((Number) to).intValue(); - if (end < 0) { - end = s.length() + end; - } - end = Math.min(s.length(), Math.max(start, end)); - - return s.substring(start, end); - } else { - return s.substring(start, start + 1); - } - } - } - } - } - } - return null; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value( - "If o has fields, returns the field named 'from' of o. " + - "If o is an array, returns o[from, to]. " + - "if o is a string, returns o.substring(from, to)" - ); - writer.key("params"); writer.value("o, number or string from, optional number to"); - writer.key("returns"); writer.value("Depends on actual arguments"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/HasField.java b/main/src/com/google/gridworks/expr/functions/HasField.java deleted file mode 100644 index 0741de3de..000000000 --- a/main/src/com/google/gridworks/expr/functions/HasField.java +++ /dev/null @@ -1,44 +0,0 @@ -package com.google.gridworks.expr.functions; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.expr.HasFields; -import com.google.gridworks.gel.Function; - -public class HasField implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length > 1 && args.length <= 2) { - Object v = args[0]; - Object f = args[1]; - - if (v != null && f != null && f instanceof String) { - String name = (String) f; - if (v instanceof HasFields) { - return ((HasFields) v).getField(name, bindings) != null; - } else if (v instanceof JSONObject) { - try { - return ((JSONObject) v).get(name) != null; - } catch (JSONException e) { - // ignore; will return false - } - } - } - } - return false; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns whether o has field name"); - writer.key("params"); writer.value("o, string name"); - writer.key("returns"); writer.value("boolean"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/Jsonize.java b/main/src/com/google/gridworks/expr/functions/Jsonize.java deleted file mode 100644 index a6a0392cc..000000000 --- a/main/src/com/google/gridworks/expr/functions/Jsonize.java +++ /dev/null @@ -1,56 +0,0 @@ -package com.google.gridworks.expr.functions; - -import java.util.Collection; -import java.util.Map; -import java.util.Properties; - -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.gel.Function; - -public class Jsonize implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length >= 1) { - try { - Object o1 = args[0]; - if (o1 == null) { - return "null"; - } else if (o1 instanceof Number) { - return JSONObject.numberToString((Number) o1); - } else if (o1 instanceof Boolean) { - return o1.toString(); - } else if (o1 instanceof JSONObject) { - return ((JSONObject) o1).toString(); - } else if (o1 instanceof JSONArray) { - return ((JSONArray) o1).toString(); - } else if (o1 instanceof Map) { - return new JSONObject((Map) o1).toString(); - } else if (o1 instanceof Collection) { - return new JSONArray((Collection) o1).toString(); - } else if (o1.getClass().isArray()) { - return new JSONArray(o1).toString(); - } else { - return JSONObject.quote(o1.toString()); - } - } catch (JSONException e) { - throw new RuntimeException(e); - } - } - return null; - } - - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Quotes a value as a JSON literal value"); - writer.key("params"); writer.value("value"); - writer.key("returns"); writer.value("JSON literal value"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/Length.java b/main/src/com/google/gridworks/expr/functions/Length.java deleted file mode 100644 index 124ccc404..000000000 --- a/main/src/com/google/gridworks/expr/functions/Length.java +++ /dev/null @@ -1,49 +0,0 @@ -package com.google.gridworks.expr.functions; - -import java.util.Collection; -import java.util.Properties; - -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.expr.HasFieldsList; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.Function; - -public class Length implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length == 1) { - Object v = args[0]; - - if (v != null) { - if (v.getClass().isArray()) { - Object[] a = (Object[]) v; - return a.length; - } else if (v instanceof Collection) { - return ((Collection) v).size(); - } else if (v instanceof HasFieldsList) { - return ((HasFieldsList) v).length(); - } else if (v instanceof JSONArray) { - return ((JSONArray) v).length(); - } else { - String s = (v instanceof String ? (String) v : v.toString()); - return s.length(); - } - } - } - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects an array or a string"); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns the length of o"); - writer.key("params"); writer.value("array or string o"); - writer.key("returns"); writer.value("number"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/Slice.java b/main/src/com/google/gridworks/expr/functions/Slice.java deleted file mode 100644 index b097f1c58..000000000 --- a/main/src/com/google/gridworks/expr/functions/Slice.java +++ /dev/null @@ -1,110 +0,0 @@ -package com.google.gridworks.expr.functions; - -import java.util.List; -import java.util.Properties; - -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.expr.HasFieldsList; -import com.google.gridworks.gel.Function; - -public class Slice implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length > 1 && args.length <= 3) { - Object v = args[0]; - Object from = args[1]; - Object to = (args.length == 3) ? args[2] : null; - - if (v != null && from != null && from instanceof Number && (to == null || to instanceof Number)) { - if (v.getClass().isArray() || v instanceof List || v instanceof HasFieldsList || v instanceof JSONArray) { - int length = 0; - if (v.getClass().isArray()) { - length = ((Object[]) v).length; - } else if (v instanceof HasFieldsList) { - length = ((HasFieldsList) v).length(); - } else if (v instanceof JSONArray) { - length = ((JSONArray) v).length(); - } else { - length = ExpressionUtils.toObjectList(v).size(); - } - - int start = ((Number) from).intValue(); - int end = (to != null) ? ((Number) to).intValue() : length; - - if (start < 0) { - start = length + start; - } - start = Math.min(length, Math.max(0, start)); - - if (end < 0) { - end = length + end; - } - end = Math.min(length, Math.max(start, end)); - - if (v.getClass().isArray()) { - Object[] a2 = new Object[end - start]; - - System.arraycopy((Object[]) v, start, a2, 0, end - start); - - return a2; - } else if (v instanceof HasFieldsList) { - return ((HasFieldsList) v).getSubList(start, end); - } else if (v instanceof JSONArray) { - JSONArray a = (JSONArray) v; - Object[] a2 = new Object[end - start]; - - for (int i = 0; i < a2.length; i++) { - try { - a2[i] = a.get(start + i); - } catch (JSONException e) { - // ignore - } - } - - return a2; - } else { - return ExpressionUtils.toObjectList(v).subList(start, end); - } - } else { - String s = (v instanceof String) ? (String) v : v.toString(); - - int start = ((Number) from).intValue(); - if (start < 0) { - start = s.length() + start; - } - start = Math.min(s.length(), Math.max(0, start)); - - if (to != null) { - int end = ((Number) to).intValue(); - if (end < 0) { - end = s.length() + end; - } - end = Math.min(s.length(), Math.max(start, end)); - - return s.substring(start, end); - } else { - return s.substring(start); - } - } - } - } - return null; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value( - "If o is an array, returns o[from, to]. " + - "if o is a string, returns o.substring(from, to)" - ); - writer.key("params"); writer.value("o, number from, optional number to"); - writer.key("returns"); writer.value("Depends on actual arguments"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/ToDate.java b/main/src/com/google/gridworks/expr/functions/ToDate.java deleted file mode 100644 index 44528df05..000000000 --- a/main/src/com/google/gridworks/expr/functions/ToDate.java +++ /dev/null @@ -1,77 +0,0 @@ -package com.google.gridworks.expr.functions; - -import java.text.SimpleDateFormat; -import java.util.Date; -import java.util.GregorianCalendar; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.util.CalendarParser; -import com.google.gridworks.expr.util.CalendarParserException; -import com.google.gridworks.gel.Function; - -public class ToDate implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length == 0) { - // missing value, can this happen? - return null; - } - if (!(args[0] instanceof String)) { - // ignore cell values that aren't strings - return null; - } - String o1 = (String) args[0]; - - // "o, boolean month_first (optional)" - if (args.length == 1 || (args.length == 2 && args[1] instanceof Boolean)) { - boolean month_first = true; - if (args.length == 2) { - month_first = (Boolean) args[1]; - } - try { - return CalendarParser.parse( o1, (month_first) ? CalendarParser.MM_DD_YY : CalendarParser.DD_MM_YY); - } catch (CalendarParserException e) { - // do something about - } - } - - // "o, format1, format2 (optional), ..." - if (args.length>=2) { - for (int i=1;i= 1) { - Object o1 = args[0]; - if (o1 != null) { - if (o1 instanceof Calendar) { - if (args.length == 2) { - Object o2 = args[1]; - if (o2 != null && o2 instanceof String) { - SimpleDateFormat formatter = new SimpleDateFormat((String) o2); - return formatter.format(((Calendar) o1).getTime()); - } - } - } else { - return (o1 instanceof String) ? o1 : o1.toString(); - } - } - } - return null; - } - - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns o converted to a string"); - writer.key("params"); writer.value("o, string format (optional)"); - writer.key("returns"); writer.value("string"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/Type.java b/main/src/com/google/gridworks/expr/functions/Type.java deleted file mode 100644 index 811c0d35e..000000000 --- a/main/src/com/google/gridworks/expr/functions/Type.java +++ /dev/null @@ -1,50 +0,0 @@ -package com.google.gridworks.expr.functions; - -import java.util.Calendar; -import java.util.Date; -import java.util.List; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.Function; - -public class Type implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length == 1) { - Object v = args[0]; - - if (v != null) { - if (v instanceof String) { - return "string"; - } else if (v instanceof Calendar || v instanceof Date) { - return "date"; - } else if (v instanceof Number) { - return "number"; - } else if (v.getClass().isArray() || v instanceof List) { - return "array"; - } else if (v instanceof EvalError) { - return "error"; - } else { - return v.getClass().getName(); - } - } - return "undefined"; - } - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects one argument"); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns the type of o"); - writer.key("params"); writer.value("object o"); - writer.key("returns"); writer.value("string"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/arrays/ArgsToArray.java b/main/src/com/google/gridworks/expr/functions/arrays/ArgsToArray.java deleted file mode 100644 index 27592f248..000000000 --- a/main/src/com/google/gridworks/expr/functions/arrays/ArgsToArray.java +++ /dev/null @@ -1,25 +0,0 @@ -package com.google.gridworks.expr.functions.arrays; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.gel.Function; - -public class ArgsToArray implements Function { - - public Object call(Properties bindings, Object[] args) { - return args; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns all arguments passed to it as an array"); - writer.key("params"); writer.value("a1, a2, ..."); - writer.key("returns"); writer.value("array"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/arrays/Join.java b/main/src/com/google/gridworks/expr/functions/arrays/Join.java deleted file mode 100644 index 81c6db10c..000000000 --- a/main/src/com/google/gridworks/expr/functions/arrays/Join.java +++ /dev/null @@ -1,78 +0,0 @@ -package com.google.gridworks.expr.functions.arrays; - -import java.util.List; -import java.util.Properties; - -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.Function; - -public class Join implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length == 2) { - Object v = args[0]; - Object s = args[1]; - - if (v != null && s != null && s instanceof String) { - String separator = (String) s; - - if (v.getClass().isArray() || v instanceof List || v instanceof JSONArray) { - StringBuffer sb = new StringBuffer(); - if (v.getClass().isArray()) { - for (Object o : (Object[]) v) { - if (o != null) { - if (sb.length() > 0) { - sb.append(separator); - } - sb.append(o.toString()); - } - } - } else if (v instanceof JSONArray) { - JSONArray a = (JSONArray) v; - int l = a.length(); - - for (int i = 0; i < l; i++) { - if (sb.length() > 0) { - sb.append(separator); - } - try { - sb.append(a.get(i).toString()); - } catch (JSONException e) { - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + - " cannot retrieve element " + i + " of array"); - } - } - } else { - for (Object o : ExpressionUtils.toObjectList(v)) { - if (o != null) { - if (sb.length() > 0) { - sb.append(separator); - } - sb.append(o.toString()); - } - } - } - - return sb.toString(); - } - } - } - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects an array and a string"); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns the string obtained by joining the array a with the separator sep"); - writer.key("params"); writer.value("array a, string sep"); - writer.key("returns"); writer.value("string"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/arrays/Reverse.java b/main/src/com/google/gridworks/expr/functions/arrays/Reverse.java deleted file mode 100644 index 48cc991a3..000000000 --- a/main/src/com/google/gridworks/expr/functions/arrays/Reverse.java +++ /dev/null @@ -1,65 +0,0 @@ -package com.google.gridworks.expr.functions.arrays; - -import java.util.List; -import java.util.Properties; - -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.Function; -import com.google.gridworks.util.JSONUtilities; - -public class Reverse implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length == 1) { - Object v = args[0]; - - if (v != null) { - if (v instanceof JSONArray) { - try { - v = JSONUtilities.toArray((JSONArray) v); - } catch (JSONException e) { - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + - " fails to process a JSON array: " + e.getMessage()); - } - } - - if (v.getClass().isArray() || v instanceof List) { - int length = v.getClass().isArray() ? - ((Object[]) v).length : - ExpressionUtils.toObjectList(v).size(); - - Object[] r = new Object[length]; - if (v.getClass().isArray()) { - Object[] a = (Object[]) v; - for (int i = 0; i < length; i++) { - r[i] = a[r.length - i - 1]; - } - } else { - List a = ExpressionUtils.toObjectList(v); - for (int i = 0; i < length; i++) { - r[i] = a.get(r.length - i - 1); - } - } - return r; - } - } - } - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects an array"); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Reverses array a"); - writer.key("params"); writer.value("array a"); - writer.key("returns"); writer.value("array"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/arrays/Sort.java b/main/src/com/google/gridworks/expr/functions/arrays/Sort.java deleted file mode 100644 index d590ad706..000000000 --- a/main/src/com/google/gridworks/expr/functions/arrays/Sort.java +++ /dev/null @@ -1,63 +0,0 @@ -package com.google.gridworks.expr.functions.arrays; - -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.Properties; - -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.Function; -import com.google.gridworks.util.JSONUtilities; - -public class Sort implements Function { - - @SuppressWarnings("unchecked") - public Object call(Properties bindings, Object[] args) { - if (args.length == 1) { - Object v = args[0]; - - if (v != null) { - if (v.getClass().isArray()) { - Object[] a = (Object[]) v; - Object[] r = a.clone(); - - Arrays.sort(r, 0, r.length); - - return r; - } else if (v instanceof JSONArray) { - try { - Object[] r = JSONUtilities.toArray((JSONArray) v); - - Arrays.sort(r, 0, r.length); - - return r; - } catch (JSONException e) { - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + - " fails to process a JSON array: " + e.getMessage()); - } - } else if (v instanceof List) { - List> a = (List>) v; - Collections.sort(a); - - return a; - } - } - } - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects an array"); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Sorts array a"); - writer.key("params"); writer.value("array a"); - writer.key("returns"); writer.value("array"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/arrays/Uniques.java b/main/src/com/google/gridworks/expr/functions/arrays/Uniques.java deleted file mode 100644 index cc14fb2b9..000000000 --- a/main/src/com/google/gridworks/expr/functions/arrays/Uniques.java +++ /dev/null @@ -1,63 +0,0 @@ -package com.google.gridworks.expr.functions.arrays; - -import java.util.HashSet; -import java.util.List; -import java.util.Properties; -import java.util.Set; - -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.Function; -import com.google.gridworks.util.JSONUtilities; - -public class Uniques implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length == 1) { - Object v = args[0]; - - if (v != null) { - if (v instanceof JSONArray) { - try { - v = JSONUtilities.toArray((JSONArray) v); - } catch (JSONException e) { - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + - " fails to process a JSON array: " + e.getMessage()); - } - } - - if (v.getClass().isArray() || v instanceof List) { - Set set = null; - - if (v.getClass().isArray()) { - Object[] a = (Object[]) v; - - set = new HashSet(a.length); - for (int i = 0; i < a.length; i++) { - set.add(a[i]); - } - } else { - set = new HashSet(ExpressionUtils.toObjectList(v)); - } - return set.toArray(); - } - } - } - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects an array"); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns array a with duplicates removed"); - writer.key("params"); writer.value("array a"); - writer.key("returns"); writer.value("array"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/booleans/And.java b/main/src/com/google/gridworks/expr/functions/booleans/And.java deleted file mode 100644 index 1265e42a4..000000000 --- a/main/src/com/google/gridworks/expr/functions/booleans/And.java +++ /dev/null @@ -1,30 +0,0 @@ -package com.google.gridworks.expr.functions.booleans; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.gel.Function; - -public class And implements Function { - - public Object call(Properties bindings, Object[] args) { - for (Object o : args) { - if (!Not.objectToBoolean(o)) { - return false; - } - } - return true; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("ANDs two boolean values"); - writer.key("params"); writer.value("boolean a, boolean b"); - writer.key("returns"); writer.value("boolean"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/booleans/Not.java b/main/src/com/google/gridworks/expr/functions/booleans/Not.java deleted file mode 100644 index d35ac9f8f..000000000 --- a/main/src/com/google/gridworks/expr/functions/booleans/Not.java +++ /dev/null @@ -1,35 +0,0 @@ -package com.google.gridworks.expr.functions.booleans; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.Function; - -public class Not implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length == 1) { - return !objectToBoolean(args[0]); - } - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a boolean"); - } - - public static boolean objectToBoolean(Object o) { - return o == null ? false : ( - (o instanceof Boolean) ? ((Boolean) o).booleanValue() : Boolean.parseBoolean(o.toString())); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns the opposite of b"); - writer.key("params"); writer.value("boolean b"); - writer.key("returns"); writer.value("boolean"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/booleans/Or.java b/main/src/com/google/gridworks/expr/functions/booleans/Or.java deleted file mode 100644 index 9934b4788..000000000 --- a/main/src/com/google/gridworks/expr/functions/booleans/Or.java +++ /dev/null @@ -1,30 +0,0 @@ -package com.google.gridworks.expr.functions.booleans; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.gel.Function; - -public class Or implements Function { - - public Object call(Properties bindings, Object[] args) { - for (Object o : args) { - if (Not.objectToBoolean(o)) { - return true; - } - } - return false; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns a OR b"); - writer.key("params"); writer.value("boolean a, boolean b"); - writer.key("returns"); writer.value("boolean"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/date/DatePart.java b/main/src/com/google/gridworks/expr/functions/date/DatePart.java deleted file mode 100644 index edf61e53f..000000000 --- a/main/src/com/google/gridworks/expr/functions/date/DatePart.java +++ /dev/null @@ -1,68 +0,0 @@ -package com.google.gridworks.expr.functions.date; - -import java.util.Calendar; -import java.util.Date; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.Function; - -public class DatePart implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length == 2 && - args[0] != null && (args[0] instanceof Calendar || args[0] instanceof Date) && - args[1] != null && args[1] instanceof String) { - - String part = (String) args[1]; - if (args[0] instanceof Calendar) { - return getPart((Calendar) args[0], part); - } else { - Calendar c = Calendar.getInstance(); - c.setTime((Date) args[0]); - return getPart(c, part); - } - } - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a date, a number and a string"); - } - - static private String[] s_daysOfWeek = new String[] { - "Saturday", "Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday" - }; - - private Object getPart(Calendar c, String part) { - if ("hours".equals(part) || "hour".equals(part) || "h".equals(part)) { - return c.get(Calendar.HOUR_OF_DAY); - } else if ("minutes".equals(part) || "minute".equals(part) || "min".equals(part)) { // avoid 'm' to avoid confusion with month - return c.get(Calendar.MINUTE); - } else if ("seconds".equals(part) || "sec".equals(part) || "s".equals(part)) { - return c.get(Calendar.SECOND); - } else if ("years".equals(part) || "year".equals(part)) { - return c.get(Calendar.YEAR); - } else if ("months".equals(part) || "month".equals(part)) { // avoid 'm' to avoid confusion with minute - return c.get(Calendar.MONTH) + 1; // ISSUE 115 - people expect January to be 1 not 0 - } else if ("weeks".equals(part) || "week".equals(part) || "w".equals(part)) { - return c.get(Calendar.WEEK_OF_MONTH); - } else if ("days".equals(part) || "day".equals(part) || "d".equals(part)) { - return c.get(Calendar.DAY_OF_MONTH); - } else if ("weekday".equals(part)) { - return s_daysOfWeek[c.get(Calendar.DAY_OF_WEEK)]; - } else if ("time".equals(part)) { - return c.getTimeInMillis(); - } else { - return new EvalError("Date unit '" + part + "' not recognized."); - } - } - - public void write(JSONWriter writer, Properties options) throws JSONException { - writer.object(); - writer.key("description"); writer.value("Returns part of a date"); - writer.key("params"); writer.value("date d, string part"); - writer.key("returns"); writer.value("date"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/date/Inc.java b/main/src/com/google/gridworks/expr/functions/date/Inc.java deleted file mode 100644 index ae7d826f2..000000000 --- a/main/src/com/google/gridworks/expr/functions/date/Inc.java +++ /dev/null @@ -1,58 +0,0 @@ -package com.google.gridworks.expr.functions.date; - -import java.util.Calendar; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.Function; - -public class Inc implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length == 3 && - args[0] != null && args[0] instanceof Calendar && - args[1] != null && args[1] instanceof Number && - args[2] != null && args[2] instanceof String) { - Calendar date = (Calendar) args[0]; - int amount = ((Number) args[1]).intValue(); - String unit = (String) args[2]; - - date.add(getField(unit), amount); - - return date; - } - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a date, a number and a string"); - } - - private int getField(String unit) { - if ("hours".equals(unit) || "hour".equals(unit) || "h".equals(unit)) { - return Calendar.HOUR; - } else if ("days".equals(unit) || "day".equals(unit) || "d".equals(unit)) { - return Calendar.DAY_OF_MONTH; - } else if ("years".equals(unit) || "year".equals(unit)) { - return Calendar.YEAR; - } else if ("months".equals(unit) || "month".equals(unit)) { // avoid 'm' to avoid confusion with minute - return Calendar.MONTH; - } else if ("minutes".equals(unit) || "minute".equals(unit) || "min".equals(unit)) { // avoid 'm' to avoid confusion with month - return Calendar.MINUTE; - } else if ("weeks".equals(unit) || "week".equals(unit) || "w".equals(unit)) { - return Calendar.WEEK_OF_MONTH; - } else if ("seconds".equals(unit) || "sec".equals(unit) || "s".equals(unit)) { - return Calendar.SECOND; - } else { - throw new RuntimeException("Unit '" + unit + "' not recognized."); - } - } - - public void write(JSONWriter writer, Properties options) throws JSONException { - writer.object(); - writer.key("description"); writer.value("Returns a date changed by the given amount in the given unit of time"); - writer.key("params"); writer.value("date d, number value, string unit (default to 'hour')"); - writer.key("returns"); writer.value("date"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/date/Now.java b/main/src/com/google/gridworks/expr/functions/date/Now.java deleted file mode 100644 index 4bd752c40..000000000 --- a/main/src/com/google/gridworks/expr/functions/date/Now.java +++ /dev/null @@ -1,28 +0,0 @@ -package com.google.gridworks.expr.functions.date; - -import java.util.Calendar; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.gel.Function; - -public class Now implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length == 0) { - return Calendar.getInstance(); - } - return null; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns the current time"); - writer.key("returns"); writer.value("date"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/math/Ceil.java b/main/src/com/google/gridworks/expr/functions/math/Ceil.java deleted file mode 100644 index 04d80e7ec..000000000 --- a/main/src/com/google/gridworks/expr/functions/math/Ceil.java +++ /dev/null @@ -1,30 +0,0 @@ -package com.google.gridworks.expr.functions.math; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.Function; - -public class Ceil implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length == 1 && args[0] != null && args[0] instanceof Number) { - return (long) Math.ceil(((Number) args[0]).doubleValue()); - } - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a number"); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns the ceiling of a number"); - writer.key("params"); writer.value("number d"); - writer.key("returns"); writer.value("number"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/math/Exp.java b/main/src/com/google/gridworks/expr/functions/math/Exp.java deleted file mode 100644 index 186a268ca..000000000 --- a/main/src/com/google/gridworks/expr/functions/math/Exp.java +++ /dev/null @@ -1,28 +0,0 @@ -package com.google.gridworks.expr.functions.math; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.gel.Function; - -public class Exp implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length == 1 && args[0] instanceof Number) { - return Math.exp(((Number) args[0]).doubleValue()); - } - return null; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns e^n"); - writer.key("params"); writer.value("number n"); - writer.key("returns"); writer.value("number"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/math/Floor.java b/main/src/com/google/gridworks/expr/functions/math/Floor.java deleted file mode 100644 index 04aad9c3c..000000000 --- a/main/src/com/google/gridworks/expr/functions/math/Floor.java +++ /dev/null @@ -1,31 +0,0 @@ -package com.google.gridworks.expr.functions.math; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.Function; - -public class Floor implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length == 1 && args[0] != null && args[0] instanceof Number) { - return (long) Math.floor(((Number) args[0]).doubleValue()); - } - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a number"); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns the floor of a number"); - writer.key("params"); writer.value("number d"); - writer.key("returns"); writer.value("number"); - writer.endObject(); - } - -} diff --git a/main/src/com/google/gridworks/expr/functions/math/Ln.java b/main/src/com/google/gridworks/expr/functions/math/Ln.java deleted file mode 100644 index e5e90f47c..000000000 --- a/main/src/com/google/gridworks/expr/functions/math/Ln.java +++ /dev/null @@ -1,30 +0,0 @@ -package com.google.gridworks.expr.functions.math; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.Function; - -public class Ln implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length == 1 && args[0] != null && args[0] instanceof Number) { - return Math.log(((Number) args[0]).doubleValue()); - } - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a number"); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns the natural log of n"); - writer.key("params"); writer.value("number n"); - writer.key("returns"); writer.value("number"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/math/Log.java b/main/src/com/google/gridworks/expr/functions/math/Log.java deleted file mode 100644 index e827bffae..000000000 --- a/main/src/com/google/gridworks/expr/functions/math/Log.java +++ /dev/null @@ -1,30 +0,0 @@ -package com.google.gridworks.expr.functions.math; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.Function; - -public class Log implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length == 1 && args[0] != null && args[0] instanceof Number) { - return Math.log10(((Number) args[0]).doubleValue()); - } - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a number"); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns the base 10 log of n"); - writer.key("params"); writer.value("number n"); - writer.key("returns"); writer.value("number"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/math/Max.java b/main/src/com/google/gridworks/expr/functions/math/Max.java deleted file mode 100644 index f9e64c70e..000000000 --- a/main/src/com/google/gridworks/expr/functions/math/Max.java +++ /dev/null @@ -1,34 +0,0 @@ -package com.google.gridworks.expr.functions.math; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.Function; - -public class Max implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length == 2 && - args[0] != null && args[0] instanceof Number && - args[1] != null && args[1] instanceof Number) { - return Math.max( - ((Number) args[0]).doubleValue(), - ((Number) args[1]).doubleValue()); - } - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects 2 numbers"); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns the greater of two numbers"); - writer.key("params"); writer.value("number a, number b"); - writer.key("returns"); writer.value("number"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/math/Min.java b/main/src/com/google/gridworks/expr/functions/math/Min.java deleted file mode 100644 index 9ea6a3105..000000000 --- a/main/src/com/google/gridworks/expr/functions/math/Min.java +++ /dev/null @@ -1,34 +0,0 @@ -package com.google.gridworks.expr.functions.math; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.Function; - -public class Min implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length == 2 && - args[0] != null && args[0] instanceof Number && - args[1] != null && args[1] instanceof Number) { - return Math.min( - ((Number) args[0]).doubleValue(), - ((Number) args[1]).doubleValue()); - } - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects 2 numbers"); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns the smaller of two numbers"); - writer.key("params"); writer.value("number a, number b"); - writer.key("returns"); writer.value("number"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/math/Mod.java b/main/src/com/google/gridworks/expr/functions/math/Mod.java deleted file mode 100644 index d808fcbda..000000000 --- a/main/src/com/google/gridworks/expr/functions/math/Mod.java +++ /dev/null @@ -1,35 +0,0 @@ -package com.google.gridworks.expr.functions.math; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.Function; - -public class Mod implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length == 2 && - args[0] != null && args[0] instanceof Number && - args[1] != null && args[1] instanceof Number) { - int a = ((Number) args[0]).intValue(); - int b = ((Number) args[1]).intValue(); - - return a % b; - } - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects 2 numbers"); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns a modulus b"); - writer.key("params"); writer.value("number a, number b"); - writer.key("returns"); writer.value("number"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/math/Pow.java b/main/src/com/google/gridworks/expr/functions/math/Pow.java deleted file mode 100644 index 16e87b923..000000000 --- a/main/src/com/google/gridworks/expr/functions/math/Pow.java +++ /dev/null @@ -1,31 +0,0 @@ -package com.google.gridworks.expr.functions.math; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.gel.Function; - -public class Pow implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length == 2 && args[0] instanceof Number && args[1] instanceof Number) { - return Math.pow( - ((Number) args[0]).doubleValue(), - ((Number) args[1]).doubleValue() - ); - } - return null; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns a^b"); - writer.key("params"); writer.value("number a, number b"); - writer.key("returns"); writer.value("number"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/math/Round.java b/main/src/com/google/gridworks/expr/functions/math/Round.java deleted file mode 100644 index 2c906ca74..000000000 --- a/main/src/com/google/gridworks/expr/functions/math/Round.java +++ /dev/null @@ -1,30 +0,0 @@ -package com.google.gridworks.expr.functions.math; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.Function; - -public class Round implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length == 1 && args[0] != null && args[0] instanceof Number) { - return ((Number) args[0]).longValue(); - } - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a number"); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns n rounded"); - writer.key("params"); writer.value("number n"); - writer.key("returns"); writer.value("number"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/math/Sum.java b/main/src/com/google/gridworks/expr/functions/math/Sum.java deleted file mode 100644 index 549f87932..000000000 --- a/main/src/com/google/gridworks/expr/functions/math/Sum.java +++ /dev/null @@ -1,59 +0,0 @@ -package com.google.gridworks.expr.functions.math; - -import java.util.List; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.Function; - -public class Sum implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length == 1) { - Object v = args[0]; - - if (v != null && (v.getClass().isArray() || v instanceof List)) { - int length = v.getClass().isArray() ? - ((Object[]) v).length : - ExpressionUtils.toObjectList(v).size(); - - double total = 0; - - if (v.getClass().isArray()) { - Object[] a = (Object[]) v; - for (int i = 0; i < length; i++) { - Object n = a[length - i - 1]; - if (n instanceof Number) { - total += ((Number) n).doubleValue(); - } - } - } else { - List a = ExpressionUtils.toObjectList(v); - for (int i = 0; i < length; i++) { - Object n = a.get(length - i - 1); - if (n instanceof Number) { - total += ((Number) n).doubleValue(); - } - } - } - return total; - } - } - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects an array of numbers"); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Sums numbers in array a"); - writer.key("params"); writer.value("array a"); - writer.key("returns"); writer.value("number"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/strings/Chomp.java b/main/src/com/google/gridworks/expr/functions/strings/Chomp.java deleted file mode 100644 index 2e5d04f36..000000000 --- a/main/src/com/google/gridworks/expr/functions/strings/Chomp.java +++ /dev/null @@ -1,33 +0,0 @@ -package com.google.gridworks.expr.functions.strings; - -import java.util.Properties; - -import org.apache.commons.lang.StringUtils; -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.gel.Function; - -public class Chomp implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length == 2) { - Object o1 = args[0]; - Object o2 = args[1]; - if (o1 != null && o2 != null && o1 instanceof String && o2 instanceof String) { - return StringUtils.chomp((String) o1, (String) o2); - } - } - return null; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Removes separator from the end of str if it's there, otherwise leave it alone."); - writer.key("params"); writer.value("string str, string separator"); - writer.key("returns"); writer.value("string"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/strings/Contains.java b/main/src/com/google/gridworks/expr/functions/strings/Contains.java deleted file mode 100644 index 56d1d6a6a..000000000 --- a/main/src/com/google/gridworks/expr/functions/strings/Contains.java +++ /dev/null @@ -1,31 +0,0 @@ -package com.google.gridworks.expr.functions.strings; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.gel.Function; - -public class Contains implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length == 2) { - Object s1 = args[0]; - Object s2 = args[1]; - if (s1 != null && s2 != null && s1 instanceof String && s2 instanceof String) { - return ((String) s1).indexOf((String) s2) > -1; - } - } - return null; - } - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns whether s contains frag"); - writer.key("params"); writer.value("string s, string frag"); - writer.key("returns"); writer.value("boolean"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/strings/Diff.java b/main/src/com/google/gridworks/expr/functions/strings/Diff.java deleted file mode 100644 index 58beb0b61..000000000 --- a/main/src/com/google/gridworks/expr/functions/strings/Diff.java +++ /dev/null @@ -1,59 +0,0 @@ -package com.google.gridworks.expr.functions.strings; - -import java.util.Calendar; -import java.util.Date; -import java.util.Properties; - -import org.apache.commons.lang.StringUtils; -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.util.CalendarParser; -import com.google.gridworks.expr.util.CalendarParserException; -import com.google.gridworks.gel.Function; - -public class Diff implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length >= 2 && args.length <= 3) { - Object o1 = args[0]; - Object o2 = args[1]; - if (o1 != null && o2 != null) { - if (o1 instanceof String && o2 instanceof String) { - return StringUtils.difference((String) o1,(String) o2); - } else if ((o1 instanceof Date || o1 instanceof Calendar) && args.length == 3) { - Object o3 = args[2]; - if (o3 != null && o3 instanceof String) { - try { - String unit = ((String) o3).toLowerCase(); - Date c1 = (o1 instanceof Date) ? (Date) o1 : ((Calendar) o1).getTime(); - Date c2 = (o2 instanceof Date) ? (Date) o2 : CalendarParser.parse((o2 instanceof String) ? (String) o2 : o2.toString()).getTime(); - long delta = (c1.getTime() - c2.getTime()) / 1000; - if ("seconds".equals(unit)) return delta; - delta /= 60; - if ("minutes".equals(unit)) return delta; - delta /= 60; - if ("hours".equals(unit)) return delta; - long days = delta / 24; - if ("days".equals(unit)) return days; - if ("weeks".equals(unit)) return days / 7; - if ("months".equals(unit)) return days / 30; - if ("years".equals(unit)) return days / 365; - } catch (CalendarParserException e) { - // we should throw at this point because it's important to know that date parsing failed - } - } - } - } - } - return null; - } - - public void write(JSONWriter writer, Properties options) throws JSONException { - writer.object(); - writer.key("description"); writer.value("For strings, returns the portion where they differ. For dates, it returns the difference in given time units"); - writer.key("params"); writer.value("o1, o2, time unit (optional)"); - writer.key("returns"); writer.value("string for strings, number for dates"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/strings/EndsWith.java b/main/src/com/google/gridworks/expr/functions/strings/EndsWith.java deleted file mode 100644 index 929ffd1d5..000000000 --- a/main/src/com/google/gridworks/expr/functions/strings/EndsWith.java +++ /dev/null @@ -1,34 +0,0 @@ -package com.google.gridworks.expr.functions.strings; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.Function; - -public class EndsWith implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length == 2) { - Object s1 = args[0]; - Object s2 = args[1]; - if (s1 != null && s2 != null && s1 instanceof String && s2 instanceof String) { - return ((String) s1).endsWith((String) s2); - } - } - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects 2 strings"); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns whether s ends with sub"); - writer.key("params"); writer.value("string s, string sub"); - writer.key("returns"); writer.value("boolean"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/strings/Escape.java b/main/src/com/google/gridworks/expr/functions/strings/Escape.java deleted file mode 100644 index eb7b7cfd7..000000000 --- a/main/src/com/google/gridworks/expr/functions/strings/Escape.java +++ /dev/null @@ -1,53 +0,0 @@ -package com.google.gridworks.expr.functions.strings; - -import java.io.UnsupportedEncodingException; -import java.net.URLEncoder; -import java.util.Properties; - -import org.apache.commons.lang.StringEscapeUtils; -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.Function; - -public class Escape implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length == 2) { - Object o1 = args[0]; - Object o2 = args[1]; - if (o1 != null && o2 != null && o1 instanceof String && o2 instanceof String) { - String s = (String) o1; - String mode = ((String) o2).toLowerCase(); - if ("html".equals(mode)) { - return StringEscapeUtils.escapeHtml(s); - } else if ("xml".equals(mode)) { - return StringEscapeUtils.escapeXml(s); - } else if ("csv".equals(mode)) { - return StringEscapeUtils.escapeCsv(s); - } else if ("javascript".equals(mode)) { - return StringEscapeUtils.escapeJavaScript(s); - } else if ("url".equals(mode)) { - try { - return URLEncoder.encode(s,"UTF-8"); - } catch (UnsupportedEncodingException e) {} - } else { - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " does not recognize mode '" + mode + "'."); - } - } - } - return null; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Escapes a string depending on the given escaping mode."); - writer.key("params"); writer.value("string s, string mode ['html','xml','csv','url','javascript']"); - writer.key("returns"); writer.value("string"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/strings/Fingerprint.java b/main/src/com/google/gridworks/expr/functions/strings/Fingerprint.java deleted file mode 100644 index 6f7239e8d..000000000 --- a/main/src/com/google/gridworks/expr/functions/strings/Fingerprint.java +++ /dev/null @@ -1,34 +0,0 @@ -package com.google.gridworks.expr.functions.strings; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.clustering.binning.FingerprintKeyer; -import com.google.gridworks.clustering.binning.Keyer; -import com.google.gridworks.gel.Function; - -public class Fingerprint implements Function { - - static Keyer fingerprint = new FingerprintKeyer(); - - public Object call(Properties bindings, Object[] args) { - if (args.length == 1 && args[0] != null) { - Object o = args[0]; - String s = (o instanceof String) ? (String) o : o.toString(); - return fingerprint.key(s); - } - return null; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns the fingerprint of s, a derived string that aims to be a more canonical form of it (this is mostly useful for finding clusters of strings related to the same information)."); - writer.key("params"); writer.value("string s"); - writer.key("returns"); writer.value("string"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/strings/IndexOf.java b/main/src/com/google/gridworks/expr/functions/strings/IndexOf.java deleted file mode 100644 index 3d2e21ade..000000000 --- a/main/src/com/google/gridworks/expr/functions/strings/IndexOf.java +++ /dev/null @@ -1,34 +0,0 @@ -package com.google.gridworks.expr.functions.strings; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.Function; - -public class IndexOf implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length == 2) { - Object s1 = args[0]; - Object s2 = args[1]; - if (s1 != null && s2 != null && s1 instanceof String && s2 instanceof String) { - return ((String) s1).indexOf((String) s2); - } - } - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects 2 strings"); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns the index of sub first ocurring in s"); - writer.key("params"); writer.value("string s, string sub"); - writer.key("returns"); writer.value("number"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/strings/LastIndexOf.java b/main/src/com/google/gridworks/expr/functions/strings/LastIndexOf.java deleted file mode 100644 index a0f1fe60d..000000000 --- a/main/src/com/google/gridworks/expr/functions/strings/LastIndexOf.java +++ /dev/null @@ -1,35 +0,0 @@ -package com.google.gridworks.expr.functions.strings; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.Function; - -public class LastIndexOf implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length == 2) { - Object s1 = args[0]; - Object s2 = args[1]; - if (s1 != null && s2 != null && s1 instanceof String && s2 instanceof String) { - return ((String) s1).lastIndexOf((String) s2); - } - } - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects 2 strings"); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns the index of sub last ocurring in s"); - writer.key("params"); writer.value("string s, string sub"); - writer.key("returns"); writer.value("number"); - writer.endObject(); - } - -} diff --git a/main/src/com/google/gridworks/expr/functions/strings/MD5.java b/main/src/com/google/gridworks/expr/functions/strings/MD5.java deleted file mode 100644 index ffe4770a7..000000000 --- a/main/src/com/google/gridworks/expr/functions/strings/MD5.java +++ /dev/null @@ -1,33 +0,0 @@ -package com.google.gridworks.expr.functions.strings; - -import java.util.Properties; - -import org.apache.commons.codec.digest.DigestUtils; -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.Function; - -public class MD5 implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length == 1 && args[0] != null) { - Object o = args[0]; - String s = (o instanceof String) ? (String) o : o.toString(); - return DigestUtils.md5Hex(s); - } - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a string"); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns the MD5 hash of s"); - writer.key("params"); writer.value("string s"); - writer.key("returns"); writer.value("string"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/strings/Match.java b/main/src/com/google/gridworks/expr/functions/strings/Match.java deleted file mode 100644 index 16c0f9e0d..000000000 --- a/main/src/com/google/gridworks/expr/functions/strings/Match.java +++ /dev/null @@ -1,55 +0,0 @@ -package com.google.gridworks.expr.functions.strings; - -import java.util.Properties; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.Function; - -public class Match implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length == 2) { - Object s = args[0]; - Object p = args[1]; - - if (s != null && p != null && (p instanceof String || p instanceof Pattern)) { - - Pattern pattern = (p instanceof String) ? Pattern.compile((String) p) : (Pattern) p; - - Matcher matcher = pattern.matcher(s.toString()); - - if (matcher.matches()) { - int count = matcher.groupCount(); - - String[] groups = new String[count]; - for (int i = 0; i < count; i++) { - groups[i] = matcher.group(i + 1); - } - - return groups; - } else { - return null; - } - } - - return null; - } - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a string or a regexp"); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns an array of the groups matching the given regular expression"); - writer.key("params"); writer.value("string or regexp"); - writer.key("returns"); writer.value("array of strings"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/strings/NGram.java b/main/src/com/google/gridworks/expr/functions/strings/NGram.java deleted file mode 100644 index 5824e0df3..000000000 --- a/main/src/com/google/gridworks/expr/functions/strings/NGram.java +++ /dev/null @@ -1,56 +0,0 @@ -package com.google.gridworks.expr.functions.strings; - -import java.util.Properties; - -import org.apache.commons.lang.StringUtils; -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.Function; - -public class NGram implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length == 2) { - Object s = args[0]; - Object n = args[1]; - - if (s != null && s instanceof String && n != null && n instanceof Number) { - - String[] tokens = StringUtils.split((String) s); - - int count = ((Number) n).intValue(); - if (count >= tokens.length) { - return new String[] { (String) s }; - } - - int len = tokens.length - count + 1; - String[] ngrams = new String[len]; - for (int i = 0; i < len; i++) { - String[] ss = new String[count]; - for (int j = 0; j < count; j++) { - ss[j] = tokens[i + j]; - } - ngrams[i] = StringUtils.join(ss,' '); - } - - return ngrams; - } - - return null; - } - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a string and a number"); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns an array of the word ngrams of s"); - writer.key("params"); writer.value("string s, number n"); - writer.key("returns"); writer.value("array of strings"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/strings/NGramFingerprint.java b/main/src/com/google/gridworks/expr/functions/strings/NGramFingerprint.java deleted file mode 100644 index 81ce5681d..000000000 --- a/main/src/com/google/gridworks/expr/functions/strings/NGramFingerprint.java +++ /dev/null @@ -1,53 +0,0 @@ -package com.google.gridworks.expr.functions.strings; - -import java.util.Properties; -import java.util.TreeSet; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.clustering.binning.Keyer; -import com.google.gridworks.clustering.binning.NGramFingerprintKeyer; -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.Function; - -public class NGramFingerprint implements Function { - - static Keyer ngram_fingerprint = new NGramFingerprintKeyer(); - - public Object call(Properties bindings, Object[] args) { - if (args.length == 1 || args.length == 2) { - if (args[0] != null) { - int ngram_size = 1; - if (args.length == 2 && args[1] != null) { - ngram_size = (args[1] instanceof Number) ? ((Number) args[1]).intValue() : Integer.parseInt(args[1].toString()); - } - Object o = args[0]; - String s = (o instanceof String) ? (String) o : o.toString(); - return ngram_fingerprint.key(s,ngram_size); - } - return null; - } - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects at least a string"); - } - - protected TreeSet ngram_split(String s, int size) { - TreeSet set = new TreeSet(); - char[] chars = s.toCharArray(); - for (int i = 0; i + size <= chars.length; i++) { - set.add(new String(chars,i,size)); - } - return set; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns the n-gram fingerprint of s"); - writer.key("params"); writer.value("string s, number n"); - writer.key("returns"); writer.value("string"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/strings/ParseJson.java b/main/src/com/google/gridworks/expr/functions/strings/ParseJson.java deleted file mode 100644 index 1baae3be1..000000000 --- a/main/src/com/google/gridworks/expr/functions/strings/ParseJson.java +++ /dev/null @@ -1,39 +0,0 @@ -package com.google.gridworks.expr.functions.strings; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONTokener; -import org.json.JSONWriter; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.Function; - -public class ParseJson implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length >= 1) { - Object o1 = args[0]; - if (o1 != null) { - try { - return new JSONTokener(o1.toString()).nextValue(); - } catch (JSONException e) { - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " failed: " + e.getMessage()); - } - } - } - return null; - } - - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Parses a string as JSON"); - writer.key("params"); writer.value("string s"); - writer.key("returns"); writer.value("JSON object"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/strings/Partition.java b/main/src/com/google/gridworks/expr/functions/strings/Partition.java deleted file mode 100644 index f7ded1b4e..000000000 --- a/main/src/com/google/gridworks/expr/functions/strings/Partition.java +++ /dev/null @@ -1,79 +0,0 @@ -package com.google.gridworks.expr.functions.strings; - -import java.util.Properties; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.gel.Function; - -public class Partition implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length >= 2 && args.length <= 3) { - Object o1 = args[0]; - Object o2 = args[1]; - - boolean omitFragment = false; - if (args.length == 3) { - Object o3 = args[2]; - if (o3 instanceof Boolean) { - omitFragment = ((Boolean) o3).booleanValue(); - } - } - - if (o1 != null && o2 != null && o1 instanceof String) { - String s = (String) o1; - - int from = -1; - int to = -1; - - if (o2 instanceof String) { - String frag = (String) o2; - - from = s.indexOf(frag); - to = from + frag.length(); - } else if (o2 instanceof Pattern) { - Pattern pattern = (Pattern) o2; - Matcher matcher = pattern.matcher(s); - if (matcher.find()) { - from = matcher.start(); - to = matcher.end(); - } - } - - String[] output = omitFragment ? new String[2] : new String[3]; - if (from > -1) { - output[0] = s.substring(0, from); - if (omitFragment) { - output[1] = s.substring(to); - } else { - output[1] = s.substring(from, to); - output[2] = s.substring(to); - } - } else { - output[0] = s; - output[1] = ""; - if (!omitFragment) { - output[2] = ""; - } - } - return output; - } - } - return null; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value( - "Returns an array of strings [a,frag,b] where a is the string part before the first occurrence of frag in s and b is what's left. If omitFragment is true, frag is not returned."); - writer.key("params"); writer.value("string s, string or regex frag, optional boolean omitFragment"); - writer.key("returns"); writer.value("array"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/strings/Phonetic.java b/main/src/com/google/gridworks/expr/functions/strings/Phonetic.java deleted file mode 100644 index e49da14ae..000000000 --- a/main/src/com/google/gridworks/expr/functions/strings/Phonetic.java +++ /dev/null @@ -1,51 +0,0 @@ -package com.google.gridworks.expr.functions.strings; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.clustering.binning.DoubleMetaphoneKeyer; -import com.google.gridworks.clustering.binning.MetaphoneKeyer; -import com.google.gridworks.clustering.binning.SoundexKeyer; -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.Function; - -public class Phonetic implements Function { - - static private DoubleMetaphoneKeyer metaphone2 = new DoubleMetaphoneKeyer(); - static private MetaphoneKeyer metaphone = new MetaphoneKeyer(); - static private SoundexKeyer soundex = new SoundexKeyer(); - - public Object call(Properties bindings, Object[] args) { - if (args.length == 2) { - Object o1 = args[0]; - Object o2 = args[1]; - if (o1 != null && o2 != null && o2 instanceof String) { - String str = (o1 instanceof String) ? (String) o1 : o1.toString(); - String encoding = ((String) o2).toLowerCase(); - if ("doublemetaphone".equals(encoding)) { - return metaphone2.key(str); - } else if ("metaphone".equals(encoding)) { - return metaphone.key(str); - } else if ("soundex".equals(encoding)) { - return soundex.key(str); - } else { - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " doesn't know how to handle the '" + encoding + "' encoding."); - } - } - } - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects 3 strings"); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns the a phonetic encoding of s (optionally indicating which encoding to use')"); - writer.key("params"); writer.value("string s, string encoding (optional, defaults to 'DoubleMetaphone')"); - writer.key("returns"); writer.value("string"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/strings/RPartition.java b/main/src/com/google/gridworks/expr/functions/strings/RPartition.java deleted file mode 100644 index ef4797cf3..000000000 --- a/main/src/com/google/gridworks/expr/functions/strings/RPartition.java +++ /dev/null @@ -1,80 +0,0 @@ -package com.google.gridworks.expr.functions.strings; - -import java.util.Properties; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.gel.Function; - -public class RPartition implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length >= 2 && args.length <= 3) { - Object o1 = args[0]; - Object o2 = args[1]; - - boolean omitFragment = false; - if (args.length == 3) { - Object o3 = args[2]; - if (o3 instanceof Boolean) { - omitFragment = ((Boolean) o3).booleanValue(); - } - } - - if (o1 != null && o2 != null && o1 instanceof String) { - String s = (String) o1; - - int from = -1; - int to = -1; - - if (o2 instanceof String) { - String frag = (String) o2; - - from = s.lastIndexOf(frag); - to = from + frag.length(); - } else if (o2 instanceof Pattern) { - Pattern pattern = (Pattern) o2; - Matcher matcher = pattern.matcher(s); - - while (matcher.find()) { - from = matcher.start(); - to = matcher.end(); - } - } - - String[] output = omitFragment ? new String[2] : new String[3]; - if (from > -1) { - output[0] = s.substring(0, from); - if (omitFragment) { - output[1] = s.substring(to); - } else { - output[1] = s.substring(from, to); - output[2] = s.substring(to); - } - } else { - output[0] = s; - output[1] = ""; - if (!omitFragment) { - output[2] = ""; - } - } - return output; - } - } - return null; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value( - "Returns an array of strings [a,frag,b] where a is the string part before the last occurrence of frag in s and b is what's left. If omitFragment is true, frag is not returned."); - writer.key("params"); writer.value("string s, string or regex frag, optional boolean omitFragment"); - writer.key("returns"); writer.value("array"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/strings/Reinterpret.java b/main/src/com/google/gridworks/expr/functions/strings/Reinterpret.java deleted file mode 100644 index b73c37131..000000000 --- a/main/src/com/google/gridworks/expr/functions/strings/Reinterpret.java +++ /dev/null @@ -1,51 +0,0 @@ -package com.google.gridworks.expr.functions.strings; - -import java.io.UnsupportedEncodingException; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.ProjectManager; -import com.google.gridworks.ProjectMetadata; -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.Function; -import com.google.gridworks.model.Project; - -public class Reinterpret implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length == 2) { - Object o1 = args[0]; - Object o2 = args[1]; - if (o1 != null && o2 != null && o2 instanceof String) { - String str = (o1 instanceof String) ? (String) o1 : o1.toString(); - Project project = (Project) bindings.get("project"); - ProjectMetadata metadata = ProjectManager.singleton.getProjectMetadata(project.id); - String decoder = (String) metadata.getEncoding(); - String encoder = (String) o2; - String reinterpreted = null; - - try { - reinterpreted = new String(str.getBytes(decoder), encoder); - } catch (UnsupportedEncodingException e) { - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + ": encoding '" + encoder + "' is not available or recognized."); - } - - return reinterpreted; - } - } - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects 2 arguments"); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns s reinterpreted thru the given encoder."); - writer.key("params"); writer.value("string s, string encoder"); - writer.key("returns"); writer.value("string"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/strings/Replace.java b/main/src/com/google/gridworks/expr/functions/strings/Replace.java deleted file mode 100644 index 1d6926626..000000000 --- a/main/src/com/google/gridworks/expr/functions/strings/Replace.java +++ /dev/null @@ -1,44 +0,0 @@ -package com.google.gridworks.expr.functions.strings; - -import java.util.Properties; -import java.util.regex.Pattern; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.Function; - -public class Replace implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length == 3) { - Object o1 = args[0]; - Object o2 = args[1]; - Object o3 = args[2]; - if (o1 != null && o2 != null && o3 != null && o3 instanceof String) { - String str = (o1 instanceof String) ? (String) o1 : o1.toString(); - - if (o2 instanceof String) { - return str.replace((String) o2, (String) o3); - } else if (o2 instanceof Pattern) { - Pattern pattern = (Pattern) o2; - return pattern.matcher(str).replaceAll((String) o3); - } - } - } - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects 3 strings, or 1 string, 1 regex, and 1 string"); - } - - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns the string obtained by replacing f with r in s"); - writer.key("params"); writer.value("string s, string or regex f, string r"); - writer.key("returns"); writer.value("string"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/strings/ReplaceChars.java b/main/src/com/google/gridworks/expr/functions/strings/ReplaceChars.java deleted file mode 100644 index 6f97739fc..000000000 --- a/main/src/com/google/gridworks/expr/functions/strings/ReplaceChars.java +++ /dev/null @@ -1,38 +0,0 @@ -package com.google.gridworks.expr.functions.strings; - -import java.util.Properties; - -import org.apache.commons.lang.StringUtils; -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.Function; - -public class ReplaceChars implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length == 3) { - Object o1 = args[0]; - Object o2 = args[1]; - Object o3 = args[2]; - if (o1 != null && o2 != null && o3 != null && o2 instanceof String && o3 instanceof String) { - String str = (o1 instanceof String) ? (String) o1 : o1.toString(); - return StringUtils.replaceChars(str, (String) o2, (String) o3); - } - } - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects 3 strings"); - } - - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns the string obtained by replacing all chars in f with the char in s at that same position"); - writer.key("params"); writer.value("string s, string f, string r"); - writer.key("returns"); writer.value("string"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/strings/SHA1.java b/main/src/com/google/gridworks/expr/functions/strings/SHA1.java deleted file mode 100644 index 1fd21a3f2..000000000 --- a/main/src/com/google/gridworks/expr/functions/strings/SHA1.java +++ /dev/null @@ -1,33 +0,0 @@ -package com.google.gridworks.expr.functions.strings; - -import java.util.Properties; - -import org.apache.commons.codec.digest.DigestUtils; -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.Function; - -public class SHA1 implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length == 1 && args[0] != null) { - Object o = args[0]; - String s = (o instanceof String) ? (String) o : o.toString(); - return DigestUtils.shaHex(s); - } - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a string"); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns the SHA-1 hash of s"); - writer.key("params"); writer.value("string s"); - writer.key("returns"); writer.value("string"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/strings/SmartSplit.java b/main/src/com/google/gridworks/expr/functions/strings/SmartSplit.java deleted file mode 100644 index 9ffede25c..000000000 --- a/main/src/com/google/gridworks/expr/functions/strings/SmartSplit.java +++ /dev/null @@ -1,78 +0,0 @@ -package com.google.gridworks.expr.functions.strings; - -import java.io.IOException; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import au.com.bytecode.opencsv.CSVParser; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.Function; - -public class SmartSplit implements Function { - static protected CSVParser s_tabParser = new CSVParser( - '\t', - CSVParser.DEFAULT_QUOTE_CHARACTER, - CSVParser.DEFAULT_ESCAPE_CHARACTER, - CSVParser.DEFAULT_STRICT_QUOTES, - CSVParser.DEFAULT_IGNORE_LEADING_WHITESPACE, - false - ); - static protected CSVParser s_commaParser = new CSVParser( - ',', - CSVParser.DEFAULT_QUOTE_CHARACTER, - CSVParser.DEFAULT_ESCAPE_CHARACTER, - CSVParser.DEFAULT_STRICT_QUOTES, - CSVParser.DEFAULT_IGNORE_LEADING_WHITESPACE, - false - ); - public Object call(Properties bindings, Object[] args) { - if (args.length >= 1 && args.length <= 2) { - CSVParser parser = null; - - Object v = args[0]; - String s = v.toString(); - - if (args.length > 1) { - String sep = args[1].toString(); - parser = new CSVParser( - sep.charAt(0), - CSVParser.DEFAULT_QUOTE_CHARACTER, - CSVParser.DEFAULT_ESCAPE_CHARACTER, - CSVParser.DEFAULT_STRICT_QUOTES, - CSVParser.DEFAULT_IGNORE_LEADING_WHITESPACE, - false - ); - } - - if (parser == null) { - int tab = s.indexOf('\t'); - if (tab >= 0) { - parser = s_tabParser; - } else { - parser = s_commaParser; - } - } - - try { - return parser.parseLine(s); - } catch (IOException e) { - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " error: " + e.getMessage()); - } - } - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects 1 or 2 strings"); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns the array of strings obtained by splitting s with separator sep. Handles quotes properly. Guesses tab or comma separator if \"sep\" is not given."); - writer.key("params"); writer.value("string s, optional string sep"); - writer.key("returns"); writer.value("array"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/strings/Split.java b/main/src/com/google/gridworks/expr/functions/strings/Split.java deleted file mode 100644 index f05215e36..000000000 --- a/main/src/com/google/gridworks/expr/functions/strings/Split.java +++ /dev/null @@ -1,53 +0,0 @@ -package com.google.gridworks.expr.functions.strings; - -import java.util.Properties; -import java.util.regex.Pattern; - -import org.apache.commons.lang.StringUtils; -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.Function; - -public class Split implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length >= 2 && args.length <= 3) { - boolean preserveAllTokens = false; - - Object v = args[0]; - Object split = args[1]; - if (args.length == 3) { - Object preserve = args[2]; - if (preserve instanceof Boolean) { - preserveAllTokens = ((Boolean) preserve); - } - } - - if (v != null && split != null) { - String str = (v instanceof String ? (String) v : v.toString()); - if (split instanceof String) { - return preserveAllTokens ? - StringUtils.splitByWholeSeparatorPreserveAllTokens(str, (String) split) : - StringUtils.splitByWholeSeparator(str, (String) split); - } else if (split instanceof Pattern) { - Pattern pattern = (Pattern) split; - return pattern.split(str); - } - } - } - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects 2 strings, or 1 string and 1 regex, followed by an optional boolean"); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns the array of strings obtained by splitting s with separator sep. If preserveAllTokens is true, then empty segments are preserved."); - writer.key("params"); writer.value("string s, string or regex sep, optional boolean preserveAllTokens"); - writer.key("returns"); writer.value("array"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/strings/SplitByCharType.java b/main/src/com/google/gridworks/expr/functions/strings/SplitByCharType.java deleted file mode 100644 index 5ce81a49d..000000000 --- a/main/src/com/google/gridworks/expr/functions/strings/SplitByCharType.java +++ /dev/null @@ -1,35 +0,0 @@ -package com.google.gridworks.expr.functions.strings; - -import java.util.Properties; - -import org.apache.commons.lang.StringUtils; -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.Function; - -public class SplitByCharType implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length == 1) { - Object o = args[0]; - if (o != null) { - String s = (o instanceof String) ? (String) o : o.toString(); - return StringUtils.splitByCharacterType(s); - } - } - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects 2 strings"); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns an array of strings obtained by splitting s grouping consecutive chars by their unicode type"); - writer.key("params"); writer.value("string s"); - writer.key("returns"); writer.value("array"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/strings/SplitByLengths.java b/main/src/com/google/gridworks/expr/functions/strings/SplitByLengths.java deleted file mode 100644 index 02fbaffbd..000000000 --- a/main/src/com/google/gridworks/expr/functions/strings/SplitByLengths.java +++ /dev/null @@ -1,48 +0,0 @@ -package com.google.gridworks.expr.functions.strings; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.Function; - -public class SplitByLengths implements Function { - public Object call(Properties bindings, Object[] args) { - if (args.length >= 2 && args[0] != null) { - Object o = args[0]; - String s = o instanceof String ? (String) o : o.toString(); - - String[] results = new String[args.length - 1]; - - int lastIndex = 0; - - for (int i = 1; i < args.length; i++) { - int thisIndex = lastIndex; - - Object o2 = args[i]; - if (o2 instanceof Number) { - thisIndex = Math.min(s.length(), lastIndex + Math.max(0, ((Number) o2).intValue())); - } - - results[i - 1] = s.substring(lastIndex, thisIndex); - lastIndex = thisIndex; - } - - return results; - } - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects 1 string and 1 or more numbers"); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns the array of strings obtained by splitting s into substrings with the given lengths"); - writer.key("params"); writer.value("string s, number n, ..."); - writer.key("returns"); writer.value("array"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/strings/StartsWith.java b/main/src/com/google/gridworks/expr/functions/strings/StartsWith.java deleted file mode 100644 index 7ed35fe90..000000000 --- a/main/src/com/google/gridworks/expr/functions/strings/StartsWith.java +++ /dev/null @@ -1,33 +0,0 @@ -package com.google.gridworks.expr.functions.strings; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.Function; - -public class StartsWith implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length == 2) { - Object s1 = args[0]; - Object s2 = args[1]; - if (s1 != null && s2 != null && s1 instanceof String && s2 instanceof String) { - return ((String) s1).startsWith((String) s2); - } - } - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects 2 strings"); - } - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns whether s starts with sub"); - writer.key("params"); writer.value("string s, string sub"); - writer.key("returns"); writer.value("boolean"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/strings/ToLowercase.java b/main/src/com/google/gridworks/expr/functions/strings/ToLowercase.java deleted file mode 100644 index e3fd98e7f..000000000 --- a/main/src/com/google/gridworks/expr/functions/strings/ToLowercase.java +++ /dev/null @@ -1,31 +0,0 @@ -package com.google.gridworks.expr.functions.strings; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.Function; - -public class ToLowercase implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length == 1 && args[0] != null) { - Object o = args[0]; - return (o instanceof String ? (String) o : o.toString()).toLowerCase(); - } - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a string"); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns s converted to lowercase"); - writer.key("params"); writer.value("string s"); - writer.key("returns"); writer.value("string"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/strings/ToTitlecase.java b/main/src/com/google/gridworks/expr/functions/strings/ToTitlecase.java deleted file mode 100644 index d2c4ed7ed..000000000 --- a/main/src/com/google/gridworks/expr/functions/strings/ToTitlecase.java +++ /dev/null @@ -1,35 +0,0 @@ -package com.google.gridworks.expr.functions.strings; - -import java.util.Properties; - -import org.apache.commons.lang.WordUtils; -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.Function; - -public class ToTitlecase implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length == 1 && args[0] != null) { - Object o = args[0]; - String s = o instanceof String ? (String) o : o.toString(); - - return WordUtils.capitalizeFully(s); - } - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a string"); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns s converted to titlecase"); - writer.key("params"); writer.value("string s"); - writer.key("returns"); writer.value("string"); - writer.endObject(); - } - -} diff --git a/main/src/com/google/gridworks/expr/functions/strings/ToUppercase.java b/main/src/com/google/gridworks/expr/functions/strings/ToUppercase.java deleted file mode 100644 index 51be55a29..000000000 --- a/main/src/com/google/gridworks/expr/functions/strings/ToUppercase.java +++ /dev/null @@ -1,31 +0,0 @@ -package com.google.gridworks.expr.functions.strings; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.Function; - -public class ToUppercase implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length == 1 && args[0] != null) { - Object o = args[0]; - return (o instanceof String ? (String) o : o.toString()).toUpperCase(); - } - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a string"); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns s converted to uppercase"); - writer.key("params"); writer.value("string s"); - writer.key("returns"); writer.value("string"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/strings/Trim.java b/main/src/com/google/gridworks/expr/functions/strings/Trim.java deleted file mode 100644 index f394e886d..000000000 --- a/main/src/com/google/gridworks/expr/functions/strings/Trim.java +++ /dev/null @@ -1,30 +0,0 @@ -package com.google.gridworks.expr.functions.strings; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.gel.Function; - -public class Trim implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length == 1) { - Object s1 = args[0]; - if (s1 != null && s1 instanceof String) { - return ((String) s1).trim(); - } - } - return null; - } - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns copy of the string, with leading and trailing whitespace omitted."); - writer.key("params"); writer.value("string s"); - writer.key("returns"); writer.value("string"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/strings/Unescape.java b/main/src/com/google/gridworks/expr/functions/strings/Unescape.java deleted file mode 100644 index e190a8d5b..000000000 --- a/main/src/com/google/gridworks/expr/functions/strings/Unescape.java +++ /dev/null @@ -1,53 +0,0 @@ -package com.google.gridworks.expr.functions.strings; - -import java.io.UnsupportedEncodingException; -import java.net.URLDecoder; -import java.util.Properties; - -import org.apache.commons.lang.StringEscapeUtils; -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.Function; - -public class Unescape implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length == 2) { - Object o1 = args[0]; - Object o2 = args[1]; - if (o1 != null && o2 != null && o1 instanceof String && o2 instanceof String) { - String s = (String) o1; - String mode = ((String) o2).toLowerCase(); - if ("html".equals(mode)) { - return StringEscapeUtils.unescapeHtml(s); - } else if ("xml".equals(mode)) { - return StringEscapeUtils.unescapeXml(s); - } else if ("csv".equals(mode)) { - return StringEscapeUtils.unescapeCsv(s); - } else if ("javascript".equals(mode)) { - return StringEscapeUtils.unescapeJavaScript(s); - } else if ("url".equals(mode)) { - try { - return URLDecoder.decode(s,"UTF-8"); - } catch (UnsupportedEncodingException e) {} - } else { - return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " does not recognize mode '" + mode + "'."); - } - } - } - return null; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Unescapes all escaped parts of the string depending on the given escaping mode."); - writer.key("params"); writer.value("string s, string mode ['html','xml','csv','url','javascript']"); - writer.key("returns"); writer.value("string"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/strings/Unicode.java b/main/src/com/google/gridworks/expr/functions/strings/Unicode.java deleted file mode 100644 index 40082b38d..000000000 --- a/main/src/com/google/gridworks/expr/functions/strings/Unicode.java +++ /dev/null @@ -1,34 +0,0 @@ -package com.google.gridworks.expr.functions.strings; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.gel.Function; - -public class Unicode implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length == 1 && args[0] != null) { - Object o = args[0]; - String s = (o instanceof String) ? (String) o : o.toString(); - Integer[] output = new Integer[s.length()]; - for (int i = 0; i < s.length(); i++) { - output[i] = s.codePointAt(i); - } - return output; - } - return null; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns an array of strings describing each character of s in their full unicode notation"); - writer.key("params"); writer.value("string s"); - writer.key("returns"); writer.value("string"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/functions/strings/UnicodeType.java b/main/src/com/google/gridworks/expr/functions/strings/UnicodeType.java deleted file mode 100644 index 3165118a5..000000000 --- a/main/src/com/google/gridworks/expr/functions/strings/UnicodeType.java +++ /dev/null @@ -1,71 +0,0 @@ -package com.google.gridworks.expr.functions.strings; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.gel.Function; - -public class UnicodeType implements Function { - - public Object call(Properties bindings, Object[] args) { - if (args.length == 1 && args[0] != null) { - Object o = args[0]; - String s = (o instanceof String) ? (String) o : o.toString(); - String[] output = new String[s.length()]; - for (int i = 0; i < s.length(); i++) { - output[i] = translateType(Character.getType(s.codePointAt(i))); - } - return output; - } - return null; - } - - private String translateType(int type) { - switch(type) { - case 0: return "unassigned"; - case 1: return "uppercase letter"; - case 2: return "lowercase letter"; - case 3: return "titlecase letter"; - case 4: return "modifier letter"; - case 5: return "other letter"; - case 6: return "non spacing mark"; - case 7: return "enclosing mark"; - case 8: return "combining spacing mark"; - case 9: return "decimal digit number"; - case 10: return "letter number"; - case 11: return "other number"; - case 12: return "space separator"; - case 13: return "line separator"; - case 14: return "paragraph separator"; - case 15: return "control"; - case 16: return "format"; - // 17 does not seem to be used - case 18: return "private use"; - case 19: return "surrogate"; - case 20: return "dash punctuation"; - case 21: return "start punctuation"; - case 22: return "end punctuation"; - case 23: return "connector punctuation"; - case 24: return "other punctuation"; - case 25: return "math symbol"; - case 26: return "currency symbol"; - case 27: return "modifier symbol"; - case 28: return "other symbol"; - case 29: return "initial quote punctuation"; - case 30: return "final quote punctuation"; - default: return "unknown"; - } - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value("Returns an array of strings describing each character of s in their full unicode notation"); - writer.key("params"); writer.value("string s"); - writer.key("returns"); writer.value("string"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/expr/util/CalendarParser.java b/main/src/com/google/gridworks/expr/util/CalendarParser.java deleted file mode 100644 index 69f0646a2..000000000 --- a/main/src/com/google/gridworks/expr/util/CalendarParser.java +++ /dev/null @@ -1,1941 +0,0 @@ -package com.google.gridworks.expr.util; - -import java.lang.reflect.InvocationTargetException; -import java.lang.reflect.Method; -import java.util.Calendar; -import java.util.GregorianCalendar; -import java.util.TimeZone; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -// Taken from http://icecube.wisc.edu/~dglo/software/calparse/index.html -// Copyright Dave Glowacki. Released under the BSD license. - -/** - * Date parser state. - */ -class ParserState { - - /** bit indicating that the year comes before the month. */ - static final int YEAR_BEFORE_MONTH = 0x4; - /** bit indicating that the year comes before the day. */ - static final int YEAR_BEFORE_DAY = 0x2; - /** bit indicating that the month comes before the day. */ - static final int MONTH_BEFORE_DAY = 0x1; - - /** bit indicating that the year comes after the month. */ - static final int YEAR_AFTER_MONTH = 0x0; - /** bit indicating that the year comes after the day. */ - static final int YEAR_AFTER_DAY = 0x0; - /** bit indicating that the month comes after the day. */ - static final int MONTH_AFTER_DAY = 0x0; - - /** value indicating an unset variable. */ - static final int UNSET = Integer.MIN_VALUE; - - /** true if year should appear before month. */ - private boolean yearBeforeMonth; - /** true if year should appear before day. */ - private boolean yearBeforeDay; - /** true if month should appear before day. */ - private boolean monthBeforeDay; - - /** year. */ - private int year; - /** month (0-11). */ - private int month; - /** day of month. */ - private int day; - /** hour (0-23). */ - private int hour; - /** minute (0-59). */ - private int minute; - /** second (0-59). */ - private int second; - /** millisecond (0-999). */ - private int milli; - - /** true if time is after noon. */ - private boolean timePostMeridian; - - /** time zone (use default time zone if this is null). */ - private TimeZone timeZone; - - /** - * Create parser state for the specified order. - * - * @param order - * YY_MM_DD, MM_DD_YY, etc. - */ - ParserState(int order) { - yearBeforeMonth = (order & YEAR_BEFORE_MONTH) == YEAR_BEFORE_MONTH; - yearBeforeDay = (order & YEAR_BEFORE_DAY) == YEAR_BEFORE_DAY; - monthBeforeDay = (order & MONTH_BEFORE_DAY) == MONTH_BEFORE_DAY; - - year = UNSET; - month = UNSET; - day = UNSET; - hour = UNSET; - minute = UNSET; - second = UNSET; - timePostMeridian = false; - } - - /** - * Get day of month. - * - * @return day of month - */ - int getDate() { - return day; - } - - /** - * Get hour. - * - * @return hour - */ - int getHour() { - return hour; - } - - /** - * Get millisecond. - * - * @return millisecond - */ - int getMillisecond() { - return milli; - } - - /** - * Get minute. - * - * @return minute - */ - int getMinute() { - return minute; - } - - /** - * Get month. - * - * @return month - */ - int getMonth() { - return month; - } - - /** - * Get second. - * - * @return second - */ - int getSecond() { - return second; - } - - /** - * Get time zone. - * - * @return time zone (null if none was specified) - */ - TimeZone getTimeZone() { - return timeZone; - } - - /** - * Get year. - * - * @return year - */ - int getYear() { - return year; - } - - /** - * Is day of month value set? - * - * @return true if a value has been assigned - */ - boolean isDateSet() { - return (day != UNSET); - } - - /** - * Is hour value set? - * - * @return true if a value has been assigned - */ - boolean isHourSet() { - return (hour != UNSET); - } - - /** - * Is millisecond value set? - * - * @return true if a value has been assigned - */ - boolean isMillisecondSet() { - return (milli != UNSET); - } - - /** - * Is minute value set? - * - * @return true if a value has been assigned - */ - boolean isMinuteSet() { - return (minute != UNSET); - } - - /** - * Is a numeric month placed before a numeric day of month? - * - * @return true if month is before day of month - */ - boolean isMonthBeforeDay() { - return monthBeforeDay; - } - - /** - * Is month value set? - * - * @return true if a value has been assigned - */ - boolean isMonthSet() { - return (month != UNSET); - } - - /** - * Is second value set? - * - * @return true if a value has been assigned - */ - boolean isSecondSet() { - return (second != UNSET); - } - - /** - * Is the time post-meridian (i.e. afternoon)? - * - * @return true if time is P.M. - */ - boolean isTimePostMeridian() { - return (timePostMeridian || hour > 12); - } - - /** - * Is a numeric year placed before a numeric day of month? - * - * @return true if year is before day of month - */ - boolean isYearBeforeDay() { - return yearBeforeDay; - } - - /** - * Is a numeric year placed before a numeric month? - * - * @return true if year is before month - */ - boolean isYearBeforeMonth() { - return yearBeforeMonth; - } - - /** - * Is year value set? - * - * @return true if a value has been assigned - */ - boolean isYearSet() { - return (year != UNSET); - } - - /** - * Fill the calendar with the parsed date. - * - * @param cal - * calendar to fill - * @param ignoreChanges - * if true, throw an exception when a date like - * Sept 31 is changed to Oct 1 - * - * @throws CalendarParserException - * if the date cannot be set for some reason - */ - void setCalendar(GregorianCalendar cal, boolean ignoreChanges) - throws CalendarParserException { - cal.clear(); - if (year != UNSET && month != UNSET && day != UNSET) { - cal.set(Calendar.YEAR, year); - cal.set(Calendar.MONTH, month - 1); - cal.set(Calendar.DATE, day); - - if (!ignoreChanges) { - final int calYear = cal.get(Calendar.YEAR); - final int calMonth = cal.get(Calendar.MONTH); - final int calDay = cal.get(Calendar.DATE); - - if (calYear != year || (calMonth + 1) != month || calDay != day) { - throw new CalendarParserException("Date was set to " - + calYear + "/" + (calMonth + 1) + "/" + calDay - + " not requested " + year + "/" + month + "/" - + day); - } - } - } - - cal.clear(Calendar.HOUR); - cal.clear(Calendar.MINUTE); - cal.clear(Calendar.SECOND); - cal.clear(Calendar.MILLISECOND); - - if (hour != UNSET && minute != UNSET) { - cal.set(Calendar.HOUR, hour); - cal.set(Calendar.MINUTE, minute); - if (second != UNSET) { - cal.set(Calendar.SECOND, second); - if (milli != UNSET) { - cal.set(Calendar.MILLISECOND, milli); - } - } - - if (timeZone != null) { - cal.setTimeZone(timeZone); - } - } - } - - /** - * Set the day of month value. - * - * @param val - * day of month value - * - * @throws CalendarParserException - * if the value is not a valid day of month - */ - void setDate(int val) throws CalendarParserException { - if (val < 1 || val > 31) { - throw new CalendarParserException("Bad day " + val); - } - - day = val; - } - - /** - * Set the hour value. - * - * @param val - * hour value - * - * @throws CalendarParserException - * if the value is not a valid hour - */ - void setHour(int val) throws CalendarParserException { - final int tmpHour; - if (timePostMeridian) { - tmpHour = val + 12; - timePostMeridian = false; - } else { - tmpHour = val; - } - - if (tmpHour < 0 || tmpHour > 23) { - throw new CalendarParserException("Bad hour " + val); - } - - hour = tmpHour; - } - - /** - * Set the millisecond value. - * - * @param val - * millisecond value - * - * @throws CalendarParserException - * if the value is not a valid millisecond - */ - void setMillisecond(int val) throws CalendarParserException { - if (val < 0 || val > 999) { - throw new CalendarParserException("Bad millisecond " + val); - } - - milli = val; - } - - /** - * Set the minute value. - * - * @param val - * minute value - * - * @throws CalendarParserException - * if the value is not a valid minute - */ - void setMinute(int val) throws CalendarParserException { - if (val < 0 || val > 59) { - throw new CalendarParserException("Bad minute " + val); - } - - minute = val; - } - - /** - * Set the month value. - * - * @param val - * month value - * - * @throws CalendarParserException - * if the value is not a valid month - */ - void setMonth(int val) throws CalendarParserException { - if (val < 1 || val > 12) { - throw new CalendarParserException("Bad month " + val); - } - - month = val; - } - - /** - * Set the second value. - * - * @param val - * second value - * - * @throws CalendarParserException - * if the value is not a valid second - */ - void setSecond(int val) throws CalendarParserException { - if (val < 0 || val > 59) { - throw new CalendarParserException("Bad second " + val); - } - - second = val; - } - - /** - * Set the AM/PM indicator value. - * - * @param val - * true if time represented is after noon - */ - void setTimePostMeridian(boolean val) { - timePostMeridian = val; - } - - /** - * Set the time zone. - * - * @param tz - * time zone - */ - void setTimeZone(TimeZone tz) { - timeZone = tz; - } - - /** - * Set the year value. - * - * @param val - * year value - * - * @throws CalendarParserException - * if the value is not a valid year - */ - void setYear(int val) throws CalendarParserException { - if (val < 0) { - throw new CalendarParserException("Bad year " + val); - } - - year = val; - } -} - -/** - * A parser for arbitrary date/time strings. - */ -public class CalendarParser { - /** bit indicating that the year comes before the month. */ - public static final int YEAR_BEFORE_MONTH = ParserState.YEAR_BEFORE_MONTH; - /** bit indicating that the year comes before the day. */ - public static final int YEAR_BEFORE_DAY = ParserState.YEAR_BEFORE_DAY; - /** bit indicating that the month comes before the day. */ - public static final int MONTH_BEFORE_DAY = ParserState.MONTH_BEFORE_DAY; - - /** bit indicating that the year comes after the month. */ - public static final int YEAR_AFTER_MONTH = ParserState.YEAR_AFTER_MONTH; - /** bit indicating that the year comes after the day. */ - public static final int YEAR_AFTER_DAY = ParserState.YEAR_AFTER_DAY; - /** bit indicating that the month comes after the day. */ - public static final int MONTH_AFTER_DAY = ParserState.MONTH_AFTER_DAY; - - /** day/month/year order. */ - public static final int DD_MM_YY = YEAR_AFTER_MONTH | YEAR_AFTER_DAY - | MONTH_AFTER_DAY; - /** month/day/year order. */ - public static final int MM_DD_YY = YEAR_AFTER_MONTH | YEAR_AFTER_DAY - | MONTH_BEFORE_DAY; - /** month/year/day order. */ - public static final int MM_YY_DD = YEAR_AFTER_MONTH | YEAR_BEFORE_DAY - | MONTH_BEFORE_DAY; - /** day/year/month order. */ - public static final int DD_YY_MM = YEAR_BEFORE_MONTH | YEAR_AFTER_DAY - | MONTH_AFTER_DAY; - /** year/day/month order. */ - public static final int YY_DD_MM = YEAR_BEFORE_MONTH | YEAR_BEFORE_DAY - | MONTH_AFTER_DAY; - /** year/month/day order. */ - public static final int YY_MM_DD = YEAR_BEFORE_MONTH | YEAR_BEFORE_DAY - | MONTH_BEFORE_DAY; - - /** list of time zone names. */ - private static final String[] zoneNames = loadTimeZoneNames(); - - /** Unknown place in time parsing. */ - private static final int PLACE_UNKNOWN = 0; - /** Parsing hour value from time string. */ - private static final int PLACE_HOUR = 1; - /** Parsing minute value from time string. */ - private static final int PLACE_MINUTE = 2; - /** Parsing second value from time string. */ - private static final int PLACE_SECOND = 3; - /** Parsing millisecond value from time string. */ - private static final int PLACE_MILLI = 4; - - /** Adjustment for two-digit years will break in 2050. */ - private static final int CENTURY_OFFSET = 2000; - - /** value indicating an unset variable. */ - private static final int UNSET = ParserState.UNSET; - - /** set to true to enable debugging. */ - private static final boolean DEBUG = false; - - /** list of weekday names. */ - private static final String[] WEEKDAY_NAMES = { "sunday", "monday", - "tuesday", "wednesday", "thursday", "friday", "saturday", }; - - /** list of month abbreviations and names. */ - private static final String[][] MONTHS = { { "jan", "January" }, - { "feb", "February" }, { "mar", "March" }, { "apr", "April" }, - { "may", "May" }, { "jun", "June" }, { "jul", "July" }, - { "aug", "August" }, { "sep", "September" }, { "oct", "October" }, - { "nov", "November" }, { "dec", "December" }, }; - - /** - * Append formatted time string to the string buffer. - * - * @param buf - * string buffer - * @param cal - * object containing time - * @param needSpace - * true if a space character should be inserted before - * any data - */ - private static final void appendTimeString(StringBuffer buf, Calendar cal, boolean needSpace) { - final int hour = cal.get(Calendar.HOUR_OF_DAY); - final int minute = cal.get(Calendar.MINUTE); - final int second = cal.get(Calendar.SECOND); - final int milli = cal.get(Calendar.MILLISECOND); - - if (hour != 0 || minute != 0 || second != 0 || milli != 0) { - if (needSpace) { - buf.append(' '); - } - if (hour < 10) { - buf.append(' '); - } - buf.append(hour); - - if (minute < 10) { - buf.append(":0"); - } else { - buf.append(':'); - } - buf.append(minute); - - if (second != 0 || milli != 0) { - if (second < 10) { - buf.append(":0"); - } else { - buf.append(':'); - } - buf.append(second); - - if (milli != 0) { - if (milli < 10) { - buf.append(".00"); - } else if (milli < 100) { - buf.append(".0"); - } else { - buf.append('.'); - } - buf.append(milli); - } - } - } - - TimeZone tz = cal.getTimeZone(); - if (tz.getRawOffset() == 0) { - buf.append(" GMT"); - } else { - buf.append(' '); - - int offset = tz.getRawOffset() / (60 * 1000); - if (offset < 0) { - buf.append('-'); - offset = -offset; - } else { - buf.append('+'); - } - - int hrOff = offset / 60; - if (hrOff < 10) { - buf.append('0'); - } - buf.append(hrOff); - buf.append(':'); - - int minOff = offset % 60; - if (minOff < 10) { - buf.append('0'); - } - buf.append(minOff); - } - } - - /** - * Return a string representation of the order value. - * - * @param order - * order - * - * @return order string - */ - public static final String getOrderString(int order) { - switch (order) { - case DD_MM_YY: - return "DD_MM_YY"; - case MM_DD_YY: - return "MM_DD_YY"; - case MM_YY_DD: - return "MM_YY_DD"; - case DD_YY_MM: - return "DD_YY_MM"; - case YY_DD_MM: - return "YY_DD_MM"; - case YY_MM_DD: - return "YY_MM_DD"; - default: - break; - } - - return "??" + order + "??"; - } - - /** - * Translate a string representation of an ordinal number to the appropriate - * numeric value.
- * For example, "1st" would return 1, "23rd" - * would return 23, etc. - * - * @param str - * ordinal string - * - * @return the numeric value of the ordinal number, or - * CalendarParser.UNSET if the supplied string is not a - * valid ordinal number. - */ - private static final int getOrdinalNumber(String str) { - final int len = (str == null ? 0 : str.length()); - if (len >= 3) { - - String suffix = str.substring(len - 2); - if (suffix.equalsIgnoreCase("st") || suffix.equalsIgnoreCase("nd") - || suffix.equalsIgnoreCase("rd") - || suffix.equalsIgnoreCase("th")) { - try { - return Integer.parseInt(str.substring(0, len - 2)); - } catch (NumberFormatException nfe) { - // fall through if number was not parsed - } - } - } - - return UNSET; - } - - /** - * Get name of current place in time. - * - * @param place - * place ID - * - * @return place name ("hour", "minute", etc. - */ - private static final String getTimePlaceString(int place) { - switch (place) { - case PLACE_HOUR: - return "hour"; - case PLACE_MINUTE: - return "minute"; - case PLACE_SECOND: - return "second"; - case PLACE_MILLI: - return "millisecond"; - default: - break; - } - - return "unknown"; - } - - /** - * Determine is the supplied string is a value weekday name. - * - * @param str - * weekday name to check - * - * @return true if the supplied string is a weekday name. - */ - private static final boolean isWeekdayName(String str) { - if (str == null || str.length() < 3) { - return false; - } - - String lstr = str.toLowerCase(); - for (int i = 0; i < WEEKDAY_NAMES.length; i++) { - if (lstr.startsWith(WEEKDAY_NAMES[i]) - || WEEKDAY_NAMES[i].toLowerCase().startsWith(lstr)) { - return true; - } - } - - return false; - } - - /** - * Load list of time zones if sun.util.calendar.ZoneInfo exists. - * - * @return null if time zone list cannot be loaded. - */ - private static final String[] loadTimeZoneNames() { - Class zoneInfo; - try { - zoneInfo = Class.forName("sun.util.calendar.ZoneInfo"); - } catch (ClassNotFoundException cnfe) { - return null; - } - - Method method; - try { - method = zoneInfo.getDeclaredMethod("getAvailableIDs", new Class[0]); - } catch (NoSuchMethodException nsme) { - return null; - } - - Object result; - try { - result = method.invoke((Object) null); - } catch (IllegalAccessException iae) { - return null; - } catch (InvocationTargetException ite) { - return null; - } - - String[] tmpList = (String[]) result; - - int numSaved = 0; - String[] finalList = null; - - for (int i = 0; i < 2; i++) { - if (i > 0) { - if (numSaved == 0) { - return null; - } - - finalList = new String[numSaved]; - numSaved = 0; - } - - for (int j = 0; j < tmpList.length; j++) { - final int len = tmpList[j].length(); - if ((len > 2 && Character.isUpperCase(tmpList[j].charAt(1))) - && (len != 7 || !Character.isDigit(tmpList[j].charAt(3)))) { - if (finalList == null) { - numSaved++; - } else { - finalList[numSaved++] = tmpList[j]; - } - - if (len == 3 && tmpList[j].charAt(1) == 'S' - && tmpList[j].charAt(2) == 'T') { - if (finalList == null) { - numSaved++; - } else { - StringBuffer dst = new StringBuffer(); - dst.append(tmpList[j].charAt(0)); - dst.append("DT"); - finalList[numSaved++] = dst.toString(); - } - } - } - } - } - - return finalList; - } - - /** - * Convert the supplied month name to its numeric representation.
- * For example, "January" (or any substring) would return - * 1 and "December" would return 12. - * - * @param str - * month name - * - * @return the numeric month, or CalendarParser.UNSET if the - * supplied string is not a valid month name. - */ - public static int monthNameToNumber(String str) { - if (str != null && str.length() >= 3) { - String lstr = str.toLowerCase(); - for (int i = 0; i < MONTHS.length; i++) { - if (lstr.startsWith(MONTHS[i][0]) - || MONTHS[i][1].toLowerCase().startsWith(lstr)) { - return i + 1; - } - } - } - - return UNSET; - } - - /** - * Extract a date from a string, defaulting to YY-MM-DD order for - * all-numeric strings. - * - * @param dateStr - * date string - * - * @return parsed date - * - * @throws CalendarParserException - * if there was a problem parsing the string. - */ - public static final Calendar parse(String dateStr) - throws CalendarParserException { - return parse(dateStr, YY_MM_DD); - } - - /** - * Extract a date from a string. - * - * @param dateStr - * date string - * @param order - * order in which pieces of numeric strings are assigned (should - * be one of YY_MM_DD, MM_DD_YY, etc.) - * - * @return parsed date - * - * @throws CalendarParserException - * if there was a problem parsing the string. - */ - public static final Calendar parse(String dateStr, int order) - throws CalendarParserException { - return parse(dateStr, order, true); - } - - /** - * Extract a date from a string. - * - * @param dateStr - * date string - * @param order - * order in which pieces of numeric strings are assigned (should - * be one of YY_MM_DD, MM_DD_YY, etc.) - * @param ignoreChanges - * if true, ignore date changes such as Feb 31 - * being changed to Mar 3. - * - * @return parsed date - * - * @throws CalendarParserException - * if there was a problem parsing the string. - */ - public static final Calendar parse(String dateStr, int order, - boolean ignoreChanges) throws CalendarParserException { - if (dateStr == null) { - return null; - } - - return parseString(dateStr, order, ignoreChanges); - } - - /** - * Parse a non-numeric token from the date string. - * - * @param dateStr - * full date string - * @param state - * parser state - * @param token - * string being parsed - * - * @throws CalendarParserException - * if there was a problem parsing the token - */ - private static final void parseNonNumericToken(String dateStr, - ParserState state, String token) throws CalendarParserException { - // if it's a weekday name, ignore it - if (isWeekdayName(token)) { - if (DEBUG) { - System.err.println("IGNORE \"" + token + "\" (weekday)"); - } - return; - } - - // if it looks like a time, deal with it - if (token.indexOf(':') > 0) { - final char firstChar = token.charAt(0); - if (Character.isDigit(firstChar)) { - parseTime(dateStr, state, token); - return; - } else if (firstChar == '+' || firstChar == '-') { - parseTimeZoneOffset(dateStr, state, token); - return; - } else { - throw new CalendarParserException("Unrecognized time \"" - + token + "\" in date \"" + dateStr + "\""); - } - } - - // try to parse month name - int tmpMon = monthNameToNumber(token); - - // if token isn't a month name ... PUKE - if (tmpMon != UNSET) { - - // if month number is unset, set it and move on - if (!state.isMonthSet()) { - state.setMonth(tmpMon); - if (DEBUG) { - System.err.println("MONTH=" - + MONTHS[state.getMonth() - 1][0] + " (" + token - + ") name"); - } - return; - } - - // try to move the current month value to the year or day - if (!state.isYearSet()) { - if (state.isDateSet() || state.isYearBeforeDay()) { - state.setYear(state.getMonth()); - state.setMonth(tmpMon); - if (DEBUG) { - System.err.println("MONTH=" - + MONTHS[state.getMonth() - 1][0] + ", YEAR=" - + state.getYear() + " (" + token - + ") name swap"); - } - } else { - state.setDate(state.getMonth()); - state.setMonth(tmpMon); - if (DEBUG) { - System.err.println("MONTH=" - + MONTHS[state.getMonth() - 1][0] + ", DAY=" - + state.getDate() + " (" + token - + ") name swap"); - } - } - - return; - } - - // year was already set, so try to move month value to day - if (!state.isDateSet()) { - state.setDate(state.getMonth()); - state.setMonth(tmpMon); - if (DEBUG) { - System.err.println("MONTH=" - + MONTHS[state.getMonth() - 1][0] + ", DAY=" - + state.getDate() + " (" + token + ") name swap 2"); - } - - return; - } - - // can't move month value to year or day ... PUKE - if (DEBUG) { - System.err.println("*** Too many numbers in \"" + dateStr - + "\""); - } - throw new CalendarParserException("Too many numbers in" - + " date \"" + dateStr + "\""); - } - - // maybe it's an ordinal number list "1st", "23rd", etc. - int val = getOrdinalNumber(token); - if (val == UNSET) { - final String lToken = token.toLowerCase(); - - if (lToken.equals("am")) { - // don't need to do anything - if (DEBUG) { - System.err.println("TIME=AM (" + token + ")"); - } - return; - } else if (lToken.equals("pm")) { - if (!state.isHourSet()) { - state.setTimePostMeridian(true); - } else { - state.setHour(state.getHour() + 12); - } - - if (DEBUG) { - System.err.println("TIME=PM (" + token + ")"); - } - return; - } else if (zoneNames != null) { - // maybe it's a time zone name - for (int z = 0; z < zoneNames.length; z++) { - if (token.equalsIgnoreCase(zoneNames[z])) { - TimeZone tz = TimeZone.getTimeZone(token); - if (tz.getRawOffset() != 0 || lToken.equals("gmt")) { - state.setTimeZone(tz); - return; - } - } - } - } - - if (DEBUG) { - System.err.println("*** Unknown string \"" + token + "\""); - } - throw new CalendarParserException("Unknown string \"" + token - + "\" in date \"" + dateStr + "\""); - } - - // if no day yet, we're done - if (!state.isDateSet()) { - state.setDate(val); - if (DEBUG) { - System.err.println("DAY=" + state.getDate() + " (" + token - + ") ord"); - } - return; - } - - // if either year or month is unset... - if (!state.isYearSet() || !state.isMonthSet()) { - - // if day can't be a month, shift it into year - if (state.getDate() > 12) { - if (!state.isYearSet()) { - state.setYear(state.getDate()); - state.setDate(val); - if (DEBUG) { - System.err.println("YEAR=" + state.getYear() + ", DAY=" - + state.getDate() + " (" + token - + ") ord>12 swap"); - } - return; - } - - // year was already set, maybe we can move it to month - if (state.getYear() <= 12) { - state.setMonth(state.getYear()); - state.setYear(state.getDate()); - state.setDate(val); - - if (DEBUG) { - System.err.println("YEAR=" + state.getYear() - + ", MONTH=" + state.getMonth() + ", DAY=" - + state.getDate() + " (" + token - + ") ord megaswap"); - } - - return; - } - - // try to shift day value to either year or month - } else if (!state.isYearSet()) { - if (!state.isMonthSet() && !state.isYearBeforeMonth()) { - state.setMonth(state.getDate()); - state.setDate(val); - if (DEBUG) { - System.err.println("MONTH=" + state.getMonth() - + ", DAY=" + state.getDate() + " (" + token - + ") ord swap"); - } - return; - } - - state.setYear(state.getDate()); - state.setDate(val); - if (DEBUG) { - System.err.println("YEAR=" + state.getYear() + ", DAY=" - + state.getDate() + " (" + token + ") ord swap"); - } - return; - - // year was set, so we know month is unset - } else { - - state.setMonth(state.getDate()); - state.setDate(val); - if (DEBUG) { - System.err.println("MONTH=" + state.getMonth() + ", DAY=" - + state.getDate() + " (" + token + ") ord swap#2"); - } - return; - } - } - - if (DEBUG) { - System.err.println("*** Extra number \"" + token + "\""); - } - throw new CalendarParserException("Cannot assign ordinal in \"" - + dateStr + "\""); - } - - /** - * Split a large numeric value into a year/month/date values. - * - * @param dateStr - * full date string - * @param state - * parser state - * @param val - * numeric value to use - * - * @throws CalendarParserException - * if there was a problem splitting the value - */ - private static final void parseNumericBlob(String dateStr, - ParserState state, int val) throws CalendarParserException { - if (state.isYearSet() || state.isMonthSet() || state.isDateSet()) { - throw new CalendarParserException("Unknown value " + val - + " in date \"" + dateStr + "\""); - } - - int tmpVal = val; - if (state.isYearBeforeMonth()) { - if (state.isYearBeforeDay()) { - final int last = tmpVal % 100; - tmpVal /= 100; - - final int middle = tmpVal % 100; - tmpVal /= 100; - - state.setYear(tmpVal); - if (state.isMonthBeforeDay()) { - // YYYYMMDD - state.setMonth(middle); - state.setDate(last); - } else { - // YYYYDDMM - state.setDate(middle); - state.setMonth(last); - } - } else { - // DDYYYYMM - state.setMonth(tmpVal % 100); - tmpVal /= 100; - - state.setYear(tmpVal % 10000); - tmpVal /= 10000; - - state.setDate(tmpVal); - } - } else if (state.isYearBeforeDay()) { - // MMYYYYDD - state.setDate(tmpVal % 100); - tmpVal /= 100; - - state.setYear(tmpVal % 10000); - tmpVal /= 10000; - - state.setMonth(tmpVal); - } else { - state.setYear(tmpVal % 10000); - tmpVal /= 10000; - - final int middle = tmpVal % 100; - tmpVal /= 100; - if (state.isMonthBeforeDay()) { - // MMDDYYYY - state.setDate(middle); - state.setMonth(tmpVal); - } else { - // DDMMYYYY - state.setDate(tmpVal); - state.setMonth(middle); - } - } - - if (DEBUG) { - System.err.println("YEAR=" + state.getYear() + " MONTH=" - + state.getMonth() + " DAY=" + state.getDate() + " (" + val - + ") blob"); - } - } - - /** - * Use a numeric token from the date string. - * - * @param dateStr - * full date string - * @param state - * parser state - * @param val - * numeric value to use - * - * @throws CalendarParserException - * if there was a problem parsing the token - */ - private static final void parseNumericToken(String dateStr, - ParserState state, int val) throws CalendarParserException { - // puke if we've already found 3 values - if (state.isYearSet() && state.isMonthSet() && state.isDateSet()) { - if (DEBUG) { - System.err.println("*** Extra number " + val); - } - throw new CalendarParserException("Extra value \"" + val - + "\" in date \"" + dateStr + "\""); - } - - // puke up on negative numbers - if (val < 0) { - if (DEBUG) { - System.err.println("*** Negative number " + val); - } - throw new CalendarParserException("Found negative number in" - + " date \"" + dateStr + "\""); - } - - if (val > 9999) { - parseNumericBlob(dateStr, state, val); - return; - } - - // deal with obvious years first - if (val > 31) { - - // if no year yet, assign it and move on - if (!state.isYearSet()) { - state.setYear(val); - if (DEBUG) { - System.err.println("YEAR=" + state.getYear() + " (" + val - + ") >31"); - } - return; - } - - // puke if the year value can't possibly be a day or month - if (state.getYear() > 31) { - if (DEBUG) { - System.err.println("*** Ambiguous year " + state.getYear() - + " vs. " + val); - } - String errMsg = "Couldn't decide on year number in date \"" - + dateStr + "\""; - throw new CalendarParserException(errMsg); - } - - // if the year value can't be a month... - if (state.getYear() > 12) { - - // if day isn't set, use old val as day and new val as year - if (!state.isDateSet()) { - state.setDate(state.getYear()); - state.setYear(val); - - if (DEBUG) { - System.err.println("YEAR=" + state.getYear() + ", DAY=" - + state.getDate() + " (" + val + ") >31 swap"); - } - - return; - } - - // NOTE: both day and year are set - - // try using day value as month so we can move year - // value to day and use new value as year - if (state.getDate() <= 12) { - state.setMonth(state.getDate()); - state.setDate(state.getYear()); - state.setYear(val); - - if (DEBUG) { - System.err.println("YEAR=" + state.getYear() - + ", MONTH=" + state.getMonth() + ", DAY=" - + state.getDate() + " (" + val - + ") >31 megaswap"); - } - - return; - } - - if (DEBUG) { - System.err.println("*** Unassignable year-like" - + " number " + val); - } - throw new CalendarParserException("Bad number " + val - + " found in date \"" + dateStr + "\""); - } - - // NOTE: year <= 12 - - if (!state.isDateSet() && !state.isMonthSet()) { - if (state.isMonthBeforeDay()) { - state.setMonth(state.getYear()); - state.setYear(val); - if (DEBUG) { - System.err.println("YEAR=" + state.getYear() - + ", MONTH=" + state.getMonth() + " (" + val - + ") >31 swap"); - } - } else { - state.setDate(state.getYear()); - state.setYear(val); - if (DEBUG) { - System.err - .println("YEAR=" + state.getYear() + ", DAY=" - + state.getDate() + " (" + val - + ") >31 swap#2"); - } - } - - return; - } - - if (!state.isDateSet()) { - state.setDate(state.getYear()); - state.setYear(val); - if (DEBUG) { - System.err.println("YEAR=" + state.getYear() + ", DAY=" - + state.getDate() + " (" + val + ") >31 day swap"); - } - return; - } - - // assume this was a mishandled month - state.setMonth(state.getYear()); - state.setYear(val); - - if (DEBUG) { - System.err.println("YEAR=" + state.getYear() + ", MONTH=" - + state.getMonth() + " (" + val + ") >31 mon swap"); - } - - return; - } - - // now deal with non-month values - if (val > 12) { - - // if no year value yet... - if (!state.isYearSet()) { - - // if the day is set, or if we assign year before day... - if (state.isDateSet() || state.isYearBeforeDay()) { - state.setYear(val); - if (DEBUG) { - System.err.println("YEAR=" + state.getYear() + " (" - + val + ") >12"); - } - } else { - state.setDate(val); - if (DEBUG) { - System.err.println("DAY=" + state.getDate() + " (" - + val + ") >12"); - } - } - - return; - } - - // NOTE: year is set - - // if no day value yet, assign it and move on - if (!state.isDateSet()) { - state.setDate(val); - - if (DEBUG) { - System.err.println("DAY=" + state.getDate() + " (" + val - + ") >12 !yr"); - } - - return; - } - - // NOTE: both year and day are set - - // XXX see if we can shift things around - - if (DEBUG) { - System.err.println("*** Unassignable year/day number " + val); - } - throw new CalendarParserException("Bad number " + val - + " found in date \"" + dateStr + "\""); - } - - // NOTE: ambiguous value - - // if year is set, this must be either the month or day - if (state.isYearSet()) { - if (state.isMonthSet() - || (!state.isDateSet() && !state.isMonthBeforeDay())) { - state.setDate(val); - if (DEBUG) { - System.err.println("DAY=" + state.getDate() + " (" + val - + ") ambig!yr"); - } - } else { - state.setMonth(val); - if (DEBUG) { - System.err.println("MONTH=" + state.getMonth() + " (" + val - + ") ambig!yr"); - } - } - - return; - } - - // NOTE: year not set - - // if month is set, this must be either the year or day - if (state.isMonthSet()) { - if (state.isDateSet() || state.isYearBeforeDay()) { - state.setYear(val); - if (DEBUG) { - System.err.println("YEAR=" + state.getYear() + " (" + val - + ") ambig!mo"); - } - } else { - state.setDate(val); - if (DEBUG) { - System.err.println("DAY=" + state.getDate() + " (" + val - + ") ambig!mo"); - } - } - - return; - } - - // NOTE: neither year nor month is set - - // if day is set, this must be either the year or month - if (state.isDateSet()) { - if (state.isYearBeforeMonth()) { - state.setYear(val); - if (DEBUG) { - System.err.println("YEAR=" + state.getYear() + " (" + val - + ") ambig!day"); - } - } else { - state.setMonth(val); - if (DEBUG) { - System.err.println("MONTH=" + state.getMonth() + " (" + val - + ") ambig!day"); - } - } - - return; - } - - // NOTE: no value set yet - if (state.isYearBeforeMonth()) { - if (state.isYearBeforeDay()) { - state.setYear(val); - if (DEBUG) { - System.err.println("YEAR=" + state.getYear() + " (" + val - + ") YM|YD"); - } - } else { - state.setDate(val); - if (DEBUG) { - System.err.println("DAY=" + state.getDate() + " (" + val - + ") YM!YD"); - } - } - } else if (state.isMonthBeforeDay()) { - state.setMonth(val); - if (DEBUG) { - System.err.println("MONTH=" + state.getMonth() + " (" + val - + ") !YM|MD"); - } - } else { - state.setDate(val); - if (DEBUG) { - System.err.println("DAY=" + state.getDate() + " (" + val - + ") !YM!MD"); - } - } - } - - /** - * Extract a date from the supplied string. - * - * @param dateStr - * string to parse - * @param order - * year/month/day order (YY_MM_DD, MM_DD_YY, etc.) - * @param ignoreChanges - * if true, ignore date changes such as Feb 31 - * being changed to Mar 3. - * - * @return parsed date - * - * @throws CalendarParserException - * if no valid date was found. - */ - private static final Calendar parseString(String dateStr, int order, - boolean ignoreChanges) throws CalendarParserException { - ParserState state = new ParserState(order); - - Pattern pat = Pattern.compile("([\\s/,]+|(\\S)\\-)"); - Matcher matcher = pat.matcher(dateStr); - - int prevEnd = 0; - while (prevEnd < dateStr.length()) { - String token; - if (!matcher.find()) { - token = dateStr.substring(prevEnd); - prevEnd = dateStr.length(); - } else { - final boolean isMinus = (matcher.groupCount() == 2 && matcher - .group(2) != null); - - if (!isMinus) { - token = dateStr.substring(prevEnd, matcher.start()); - } else { - token = dateStr.substring(prevEnd, matcher.start()) - + matcher.group(2); - } - - prevEnd = matcher.end(); - } - - if (DEBUG) { - System.err.println("YEAR " - + (state.isYearSet() ? Integer - .toString(state.getYear()) : "UNSET") - + ", MONTH " - + (state.isMonthSet() ? Integer.toString(state - .getMonth()) : "UNSET") - + ", DAY " - + (state.isDateSet() ? Integer - .toString(state.getDate()) : "UNSET") - + ", TOKEN=\"" + token + "\""); - } - - // try to decipher next token as a number - try { - final int val = Integer.parseInt(token); - parseNumericToken(dateStr, state, val); - } catch (NumberFormatException e) { - parseNonNumericToken(dateStr, state, token); - } - } - - // before checking for errors, check for missing year - if (!state.isDateSet() && state.getYear() <= 31) { - int tmp = state.getDate(); - state.setDate(state.getYear()); - state.setYear(tmp); - } - - if (!state.isDateSet()) { - if (!state.isMonthSet()) { - if (!state.isYearSet()) { - throw new CalendarParserException("No date found in \"" - + dateStr + "\""); - } else { - throw new CalendarParserException("Day and month missing" - + " from \"" + dateStr + "\""); - } - } else { - throw new CalendarParserException("Day missing from \"" - + dateStr + "\""); - } - } else if (!state.isMonthSet()) { - if (!state.isYearSet()) { - throw new CalendarParserException("Year and month missing" - + " from \"" + dateStr + "\""); - } else { - throw new CalendarParserException("Month missing from \"" - + dateStr + "\""); - } - } else if (!state.isYearSet()) { - throw new CalendarParserException("Year missing from \"" + dateStr - + "\""); - } - - final int tmpYear = state.getYear(); - if (tmpYear < 50) { - state.setYear(tmpYear + CENTURY_OFFSET); - } else if (tmpYear < 100) { - state.setYear(tmpYear + (CENTURY_OFFSET - 100)); - } - - GregorianCalendar cal = new GregorianCalendar(); - - state.setCalendar(cal, ignoreChanges); - - if (DEBUG) { - System.err.println("Y" + state.getYear() + " M" + state.getMonth() - + " D" + state.getDate() + " H" + state.getHour() + " M" - + state.getMinute() + " S" + state.getSecond() + " L" - + state.getMillisecond() + " => " + toString(cal)); - } - - return cal; - } - - /** - * Parse a time string. - * - * @param dateStr - * full date string - * @param state - * parser state - * @param timeStr - * string containing colon-separated time - * - * @throws CalendarParserException - * if there is a problem with the time - */ - private static final void parseTime(String dateStr, ParserState state, - String timeStr) throws CalendarParserException { - int place = PLACE_HOUR; - - String tmpTime; - - final char lastChar = timeStr.charAt(timeStr.length() - 1); - if (lastChar != 'm' && lastChar != 'M') { - if (DEBUG) { - System.err.println("No AM/PM in \"" + timeStr + "\" (time)"); - } - tmpTime = timeStr; - } else { - final char preLast = timeStr.charAt(timeStr.length() - 2); - if (preLast == 'a' || preLast == 'A') { - state.setTimePostMeridian(false); - } else if (preLast == 'p' || preLast == 'P') { - state.setTimePostMeridian(true); - } else { - throw new CalendarParserException("Bad time \"" + timeStr - + "\" in date \"" + dateStr + "\""); - } - - tmpTime = timeStr.substring(0, timeStr.length() - 2); - if (DEBUG) { - System.err.println("Found " - + (state.isTimePostMeridian() ? "PM" : "AM") - + ". now \"" + tmpTime + "\" (time)"); - } - } - - String[] tList = tmpTime.split("[:\\.]"); - for (int i = 0; i < tList.length; i++) { - String token = tList[i]; - - if (DEBUG) { - System.err.println("HOUR " - + (state.isHourSet() ? Integer - .toString(state.getHour()) : "UNSET") - + ", MINUTE " - + (state.isMinuteSet() ? Integer.toString(state - .getMinute()) : "UNSET") - + ", SECOND " - + (state.isSecondSet() ? Integer.toString(state - .getSecond()) : "UNSET") - + ", MILLISECOND " - + (state.isMillisecondSet() ? Integer.toString(state - .getMillisecond()) : "UNSET") + ", TOKEN=\"" - + token + "\""); - } - - final int val; - try { - val = Integer.parseInt(token); - } catch (NumberFormatException nfe) { - throw new CalendarParserException("Bad " - + getTimePlaceString(place) + " string \"" + token - + "\" in \"" + dateStr + "\""); - } - - switch (place) { - case PLACE_HOUR: - try { - state.setHour(val); - } catch (CalendarParserException dfe) { - throw new CalendarParserException(dfe.getMessage() - + " in \"" + dateStr + "\""); - } - if (DEBUG) { - System.err.println("Set hour to " + val); - } - place = PLACE_MINUTE; - break; - case PLACE_MINUTE: - try { - state.setMinute(val); - } catch (CalendarParserException dfe) { - throw new CalendarParserException(dfe.getMessage() - + " in \"" + dateStr + "\""); - } - if (DEBUG) { - System.err.println("Set minute to " + val); - } - place = PLACE_SECOND; - break; - case PLACE_SECOND: - try { - state.setSecond(val); - } catch (CalendarParserException dfe) { - throw new CalendarParserException(dfe.getMessage() - + " in \"" + dateStr + "\""); - } - if (DEBUG) { - System.err.println("Set second to " + val); - } - place = PLACE_MILLI; - break; - case PLACE_MILLI: - try { - state.setMillisecond(val); - } catch (CalendarParserException dfe) { - throw new CalendarParserException(dfe.getMessage() - + " in \"" + dateStr + "\""); - } - if (DEBUG) { - System.err.println("Set millisecond to " + val); - } - place = PLACE_UNKNOWN; - break; - default: - throw new CalendarParserException("Unexpected place value " - + place); - } - } - } - - /** - * Parse a time zone offset string. - * - * @param dateStr - * full date string - * @param state - * parser state - * @param zoneStr - * string containing colon-separated time zone offset - * - * @throws CalendarParserException - * if there is a problem with the time - */ - private static final void parseTimeZoneOffset(String dateStr, - ParserState state, String zoneStr) throws CalendarParserException { - int place = PLACE_HOUR; - - final boolean isNegative = (zoneStr.charAt(0) == '-'); - if (!isNegative && zoneStr.charAt(0) != '+') { - throw new CalendarParserException("Bad time zone offset \"" - + zoneStr + "\" in date \"" + dateStr + "\""); - } - - int hour = UNSET; - int minute = UNSET; - - String[] tList = zoneStr.substring(1).split(":"); - for (int i = 0; i < tList.length; i++) { - String token = tList[i]; - - if (DEBUG) { - System.err - .println("TZ_HOUR " - + (hour != UNSET ? Integer.toString(hour) - : "UNSET") - + ", TZ_MINUTE " - + (minute != UNSET ? Integer.toString(minute) - : "UNSET") + ", TOKEN=\"" + token - + "\""); - } - - final int val; - try { - val = Integer.parseInt(token); - } catch (NumberFormatException nfe) { - throw new CalendarParserException("Bad time zone " - + getTimePlaceString(place) + " offset \"" + token - + "\" in \"" + dateStr + "\""); - } - - switch (place) { - case PLACE_HOUR: - hour = val; - if (DEBUG) { - System.err.println("Set time zone offset hour to " + val); - } - place = PLACE_MINUTE; - break; - case PLACE_MINUTE: - minute = val; - if (DEBUG) { - System.err.println("Set time zone offset minute to " + val); - } - place = PLACE_UNKNOWN; - break; - default: - throw new CalendarParserException("Unexpected place value " - + place); - } - } - - String customID = "GMT" + (isNegative ? "-" : "+") + hour + ":" - + (minute < 10 ? "0" : "") + minute; - - state.setTimeZone(TimeZone.getTimeZone(customID)); - } - - /** - * Return a printable representation of the date. - * - * @param cal - * calendar to convert to a string - * - * @return a printable string. - */ - public static final String prettyString(Calendar cal) { - if (cal == null) { - return null; - } - - final int calYear = cal.get(Calendar.YEAR); - final int calMonth = cal.get(Calendar.MONTH); - final int calDay = cal.get(Calendar.DATE); - - boolean needSpace = false; - StringBuffer buf = new StringBuffer(); - - if (calMonth >= 0 && calMonth < MONTHS.length) { - if (needSpace) { - buf.append(' '); - } - buf.append(MONTHS[calMonth][1]); - needSpace = true; - } - if (calDay > 0) { - if (needSpace) { - buf.append(' '); - } - buf.append(calDay); - if (calYear > UNSET) { - buf.append(','); - } - needSpace = true; - } - if (calYear > UNSET) { - if (needSpace) { - buf.append(' '); - } - buf.append(calYear); - } - - appendTimeString(buf, cal, needSpace); - - return buf.toString(); - } - - /** - * Return a basic representation of the string. - * - * @param cal - * calendar to convert to a string - * - * @return the basic string. - */ - public static final String toString(Calendar cal) { - if (cal == null) { - return null; - } - - final int calYear = cal.get(Calendar.YEAR); - final int calMonth = cal.get(Calendar.MONTH); - final int calDay = cal.get(Calendar.DATE); - - boolean needSpace = false; - StringBuffer buf = new StringBuffer(); - - if (calDay > 0) { - if (needSpace) { - buf.append(' '); - } - buf.append(calDay); - needSpace = true; - } - if (calMonth >= 0 && calMonth < MONTHS.length) { - if (needSpace) { - buf.append(' '); - } - buf.append(MONTHS[calMonth][1].substring(0, 3)); - needSpace = true; - } - if (calYear > UNSET) { - if (needSpace) { - buf.append(' '); - } - buf.append(calYear); - } - - appendTimeString(buf, cal, needSpace); - - return buf.toString(); - } - - /** - * Return a string representation of the date suitable for use in an SQL - * statement. - * - * @param cal - * calendar to convert to a string - * - * @return the SQL-friendly string. - */ - public static final String toSQLString(Calendar cal) { - if (cal == null) { - return null; - } - - final int calYear = cal.get(Calendar.YEAR); - final int calMonth = cal.get(Calendar.MONTH); - final int calDay = cal.get(Calendar.DATE); - - StringBuffer buf = new StringBuffer(); - - buf.append(calYear); - buf.append('-'); - if ((calMonth + 1) < 10) { - buf.append('0'); - } - buf.append(calMonth + 1); - buf.append('-'); - if (calDay < 10) { - buf.append('0'); - } - buf.append(calDay); - - appendTimeString(buf, cal, true); - - return buf.toString(); - } -} diff --git a/main/src/com/google/gridworks/expr/util/CalendarParserException.java b/main/src/com/google/gridworks/expr/util/CalendarParserException.java deleted file mode 100644 index aec1d7d0c..000000000 --- a/main/src/com/google/gridworks/expr/util/CalendarParserException.java +++ /dev/null @@ -1,24 +0,0 @@ -package com.google.gridworks.expr.util; - -// Taken from http://icecube.wisc.edu/~dglo/software/calparse/index.html -// Copyright Dave Glowacki. Released under the BSD license. - -/** - * Thrown when an invalid date is encountered in CalendarParser. - */ -public class CalendarParserException extends Exception { - - private static final long serialVersionUID = 7195725880623801198L; - - /** - * Default date format exception. - */ - public CalendarParserException() { super(); } - - /** - * Date format exception. - * - * @param str error message - */ - public CalendarParserException(String str) { super(str); } -} diff --git a/main/src/com/google/gridworks/gel/Control.java b/main/src/com/google/gridworks/gel/Control.java deleted file mode 100644 index e3ae1b725..000000000 --- a/main/src/com/google/gridworks/gel/Control.java +++ /dev/null @@ -1,17 +0,0 @@ -package com.google.gridworks.gel; - -import java.util.Properties; - -import com.google.gridworks.Jsonizable; -import com.google.gridworks.expr.Evaluable; - -/** - * Interface of GEL controls such as if, forEach, forNonBlank, with. A control can - * decide which part of the code to execute and can affect the environment bindings. - * Functions, on the other hand, can't do either. - */ -public interface Control extends Jsonizable { - public Object call(Properties bindings, Evaluable[] args); - - public String checkArguments(Evaluable[] args); -} diff --git a/main/src/com/google/gridworks/gel/ControlFunctionRegistry.java b/main/src/com/google/gridworks/gel/ControlFunctionRegistry.java deleted file mode 100644 index cf50eabd7..000000000 --- a/main/src/com/google/gridworks/gel/ControlFunctionRegistry.java +++ /dev/null @@ -1,216 +0,0 @@ -package com.google.gridworks.gel; - -import java.util.HashMap; -import java.util.Map; -import java.util.Set; -import java.util.Map.Entry; - -import com.google.gridworks.expr.functions.Cross; -import com.google.gridworks.expr.functions.FacetCount; -import com.google.gridworks.expr.functions.Get; -import com.google.gridworks.expr.functions.HasField; -import com.google.gridworks.expr.functions.Jsonize; -import com.google.gridworks.expr.functions.Length; -import com.google.gridworks.expr.functions.Slice; -import com.google.gridworks.expr.functions.ToDate; -import com.google.gridworks.expr.functions.ToNumber; -import com.google.gridworks.expr.functions.ToString; -import com.google.gridworks.expr.functions.Type; -import com.google.gridworks.expr.functions.arrays.Join; -import com.google.gridworks.expr.functions.arrays.Reverse; -import com.google.gridworks.expr.functions.arrays.Sort; -import com.google.gridworks.expr.functions.arrays.Uniques; -import com.google.gridworks.expr.functions.booleans.And; -import com.google.gridworks.expr.functions.booleans.Not; -import com.google.gridworks.expr.functions.booleans.Or; -import com.google.gridworks.expr.functions.date.DatePart; -import com.google.gridworks.expr.functions.date.Inc; -import com.google.gridworks.expr.functions.date.Now; -import com.google.gridworks.expr.functions.math.Ceil; -import com.google.gridworks.expr.functions.math.Exp; -import com.google.gridworks.expr.functions.math.Floor; -import com.google.gridworks.expr.functions.math.Ln; -import com.google.gridworks.expr.functions.math.Log; -import com.google.gridworks.expr.functions.math.Max; -import com.google.gridworks.expr.functions.math.Min; -import com.google.gridworks.expr.functions.math.Mod; -import com.google.gridworks.expr.functions.math.Pow; -import com.google.gridworks.expr.functions.math.Round; -import com.google.gridworks.expr.functions.math.Sum; -import com.google.gridworks.expr.functions.strings.Chomp; -import com.google.gridworks.expr.functions.strings.Contains; -import com.google.gridworks.expr.functions.strings.Diff; -import com.google.gridworks.expr.functions.strings.EndsWith; -import com.google.gridworks.expr.functions.strings.Escape; -import com.google.gridworks.expr.functions.strings.Fingerprint; -import com.google.gridworks.expr.functions.strings.IndexOf; -import com.google.gridworks.expr.functions.strings.LastIndexOf; -import com.google.gridworks.expr.functions.strings.MD5; -import com.google.gridworks.expr.functions.strings.Match; -import com.google.gridworks.expr.functions.strings.NGram; -import com.google.gridworks.expr.functions.strings.NGramFingerprint; -import com.google.gridworks.expr.functions.strings.ParseJson; -import com.google.gridworks.expr.functions.strings.Partition; -import com.google.gridworks.expr.functions.strings.Phonetic; -import com.google.gridworks.expr.functions.strings.RPartition; -import com.google.gridworks.expr.functions.strings.Reinterpret; -import com.google.gridworks.expr.functions.strings.Replace; -import com.google.gridworks.expr.functions.strings.ReplaceChars; -import com.google.gridworks.expr.functions.strings.SHA1; -import com.google.gridworks.expr.functions.strings.SmartSplit; -import com.google.gridworks.expr.functions.strings.Split; -import com.google.gridworks.expr.functions.strings.SplitByCharType; -import com.google.gridworks.expr.functions.strings.SplitByLengths; -import com.google.gridworks.expr.functions.strings.StartsWith; -import com.google.gridworks.expr.functions.strings.ToLowercase; -import com.google.gridworks.expr.functions.strings.ToTitlecase; -import com.google.gridworks.expr.functions.strings.ToUppercase; -import com.google.gridworks.expr.functions.strings.Trim; -import com.google.gridworks.expr.functions.strings.Unescape; -import com.google.gridworks.expr.functions.strings.Unicode; -import com.google.gridworks.expr.functions.strings.UnicodeType; -import com.google.gridworks.gel.controls.Filter; -import com.google.gridworks.gel.controls.ForEach; -import com.google.gridworks.gel.controls.ForEachIndex; -import com.google.gridworks.gel.controls.ForNonBlank; -import com.google.gridworks.gel.controls.ForRange; -import com.google.gridworks.gel.controls.If; -import com.google.gridworks.gel.controls.IsBlank; -import com.google.gridworks.gel.controls.IsError; -import com.google.gridworks.gel.controls.IsNonBlank; -import com.google.gridworks.gel.controls.IsNotNull; -import com.google.gridworks.gel.controls.IsNull; -import com.google.gridworks.gel.controls.IsNumeric; -import com.google.gridworks.gel.controls.With; - -public class ControlFunctionRegistry { - - static private Map s_nameToFunction = new HashMap(); - static private Map s_functionToName = new HashMap(); - - static private Map s_nameToControl = new HashMap(); - static private Map s_controlToName = new HashMap(); - - static public Function getFunction(String name) { - return s_nameToFunction.get(name); - } - static public String getFunctionName(Function f) { - return s_functionToName.get(f); - } - static public Set> getFunctionMapping() { - return s_nameToFunction.entrySet(); - } - - static public Control getControl(String name) { - return s_nameToControl.get(name); - } - static public String getControlName(Control f) { - return s_controlToName.get(f); - } - static public Set> getControlMapping() { - return s_nameToControl.entrySet(); - } - - static public void registerFunction(String name, Function f) { - s_nameToFunction.put(name, f); - s_functionToName.put(f, name); - } - - static public void registerControl(String name, Control c) { - s_nameToControl.put(name, c); - s_controlToName.put(c, name); - } - - static { - registerFunction("type", new Type()); - - registerFunction("toString", new ToString()); - registerFunction("toNumber", new ToNumber()); - registerFunction("toDate", new ToDate()); - - registerFunction("toUppercase", new ToUppercase()); - registerFunction("toLowercase", new ToLowercase()); - registerFunction("toTitlecase", new ToTitlecase()); - - registerFunction("hasField", new HasField()); - registerFunction("get", new Get()); - registerFunction("slice", new Slice()); - registerFunction("substring", new Slice()); - registerFunction("replace", new Replace()); - registerFunction("replaceChars", new ReplaceChars()); - registerFunction("split", new Split()); - registerFunction("smartSplit", new SmartSplit()); - registerFunction("splitByCharType", new SplitByCharType()); - registerFunction("splitByLengths", new SplitByLengths()); - registerFunction("partition", new Partition()); - registerFunction("rpartition", new RPartition()); - registerFunction("trim", new Trim()); - registerFunction("strip", new Trim()); - registerFunction("contains", new Contains()); - registerFunction("escape", new Escape()); - registerFunction("unescape", new Unescape()); - registerFunction("length", new Length()); - registerFunction("sha1", new SHA1()); - registerFunction("md5", new MD5()); - registerFunction("unicode", new Unicode()); - registerFunction("unicodeType", new UnicodeType()); - registerFunction("diff", new Diff()); - registerFunction("chomp", new Chomp()); - registerFunction("fingerprint", new Fingerprint()); - registerFunction("ngramFingerprint", new NGramFingerprint()); - registerFunction("phonetic", new Phonetic()); - registerFunction("reinterpret", new Reinterpret()); - registerFunction("jsonize", new Jsonize()); - registerFunction("parseJson", new ParseJson()); - registerFunction("ngram", new NGram()); - registerFunction("match", new Match()); - - registerFunction("indexOf", new IndexOf()); - registerFunction("lastIndexOf", new LastIndexOf()); - registerFunction("startsWith", new StartsWith()); - registerFunction("endsWith", new EndsWith()); - registerFunction("join", new Join()); - registerFunction("reverse", new Reverse()); - registerFunction("sort", new Sort()); - registerFunction("uniques", new Uniques()); - - registerFunction("now", new Now()); - registerFunction("inc", new Inc()); - registerFunction("datePart", new DatePart()); - - registerFunction("round", new Round()); - registerFunction("floor", new Floor()); - registerFunction("ceil", new Ceil()); - registerFunction("mod", new Mod()); - registerFunction("max", new Max()); - registerFunction("min", new Min()); - registerFunction("log", new Log()); - registerFunction("ln", new Ln()); - registerFunction("pow", new Pow()); - registerFunction("exp", new Exp()); - registerFunction("sum", new Sum()); - - registerFunction("and", new And()); - registerFunction("or", new Or()); - registerFunction("not", new Not()); - - registerFunction("cross", new Cross()); - - registerFunction("facetCount", new FacetCount()); - - registerControl("if", new If()); - registerControl("with", new With()); - registerControl("forEach", new ForEach()); - registerControl("forEachIndex", new ForEachIndex()); - registerControl("forRange", new ForRange()); - registerControl("filter", new Filter()); - registerControl("forNonBlank", new ForNonBlank()); - - registerControl("isNull", new IsNull()); - registerControl("isNotNull", new IsNotNull()); - registerControl("isBlank", new IsBlank()); - registerControl("isNonBlank", new IsNonBlank()); - registerControl("isNumeric", new IsNumeric()); - registerControl("isError", new IsError()); - } -} diff --git a/main/src/com/google/gridworks/gel/Function.java b/main/src/com/google/gridworks/gel/Function.java deleted file mode 100644 index 9aad4ab2a..000000000 --- a/main/src/com/google/gridworks/gel/Function.java +++ /dev/null @@ -1,13 +0,0 @@ -package com.google.gridworks.gel; - -import java.util.Properties; - -import com.google.gridworks.Jsonizable; - -/** - * Interface for functions. When a function is called, its arguments have already - * been evaluated down into non-error values. - */ -public interface Function extends Jsonizable { - public Object call(Properties bindings, Object[] args); -} diff --git a/main/src/com/google/gridworks/gel/Parser.java b/main/src/com/google/gridworks/gel/Parser.java deleted file mode 100644 index 8a4fcd8de..000000000 --- a/main/src/com/google/gridworks/gel/Parser.java +++ /dev/null @@ -1,291 +0,0 @@ -package com.google.gridworks.gel; - -import java.util.LinkedList; -import java.util.List; -import java.util.regex.Pattern; - -import com.google.gridworks.expr.Evaluable; -import com.google.gridworks.expr.ParsingException; -import com.google.gridworks.expr.functions.arrays.ArgsToArray; -import com.google.gridworks.gel.Scanner.NumberToken; -import com.google.gridworks.gel.Scanner.RegexToken; -import com.google.gridworks.gel.Scanner.Token; -import com.google.gridworks.gel.Scanner.TokenType; -import com.google.gridworks.gel.ast.ControlCallExpr; -import com.google.gridworks.gel.ast.FieldAccessorExpr; -import com.google.gridworks.gel.ast.FunctionCallExpr; -import com.google.gridworks.gel.ast.LiteralExpr; -import com.google.gridworks.gel.ast.OperatorCallExpr; -import com.google.gridworks.gel.ast.VariableExpr; - -public class Parser { - protected Scanner _scanner; - protected Token _token; - protected Evaluable _root; - - public Parser(String s) throws ParsingException { - this(s, 0, s.length()); - } - - public Parser(String s, int from, int to) throws ParsingException { - _scanner = new Scanner(s, from, to); - _token = _scanner.next(true); - - _root = parseExpression(); - } - - public Evaluable getExpression() { - return _root; - } - - protected void next(boolean regexPossible) { - _token = _scanner.next(regexPossible); - } - - protected ParsingException makeException(String desc) { - int index = _token != null ? _token.start : _scanner.getIndex(); - - return new ParsingException("Parsing error at offset " + index + ": " + desc); - } - - /** - * := - * | [ "<" "<=" ">" ">=" "==" "!=" ] - */ - protected Evaluable parseExpression() throws ParsingException { - Evaluable sub = parseSubExpression(); - - while (_token != null && - _token.type == TokenType.Operator && - ">=<==!=".indexOf(_token.text) >= 0) { - - String op = _token.text; - - next(true); - - Evaluable sub2 = parseSubExpression(); - - sub = new OperatorCallExpr(new Evaluable[] { sub, sub2 }, op); - } - - return sub; - } - - /** - * := - * | [ "+" "-" ] - */ - protected Evaluable parseSubExpression() throws ParsingException { - Evaluable sub = parseTerm(); - - while (_token != null && - _token.type == TokenType.Operator && - "+-".indexOf(_token.text) >= 0) { - - String op = _token.text; - - next(true); - - Evaluable sub2 = parseSubExpression(); - - sub = new OperatorCallExpr(new Evaluable[] { sub, sub2 }, op); - } - - return sub; - } - - /** - * := - * | [ "*" "/" ] - */ - protected Evaluable parseTerm() throws ParsingException { - Evaluable factor = parseFactor(); - - while (_token != null && - _token.type == TokenType.Operator && - "*/".indexOf(_token.text) >= 0) { - - String op = _token.text; - - next(true); - - Evaluable factor2 = parseFactor(); - - factor = new OperatorCallExpr(new Evaluable[] { factor, factor2 }, op); - } - - return factor; - } - - /** - * := ( )* - * := - * | | - | | | - * ( ) - * - * := "[" "]" - * | "." - * | "." "(" ")" - * - */ - protected Evaluable parseFactor() throws ParsingException { - if (_token == null) { - throw makeException("Expecting something more at end of expression"); - } - - Evaluable eval = null; - - if (_token.type == TokenType.String) { - eval = new LiteralExpr(_token.text); - next(false); - } else if (_token.type == TokenType.Regex) { - RegexToken t = (RegexToken) _token; - - try { - Pattern pattern = Pattern.compile(_token.text, t.caseInsensitive ? Pattern.CASE_INSENSITIVE : 0); - eval = new LiteralExpr(pattern); - next(false); - } catch (Exception e) { - throw makeException("Bad regular expression (" + e.getMessage() + ")"); - } - } else if (_token.type == TokenType.Number) { - eval = new LiteralExpr(((NumberToken)_token).value); - next(false); - } else if (_token.type == TokenType.Operator && _token.text.equals("-")) { // unary minus? - next(true); - - if (_token != null && _token.type == TokenType.Number) { - eval = new LiteralExpr(-((NumberToken)_token).value); - next(false); - } else { - throw makeException("Bad negative number"); - } - } else if (_token.type == TokenType.Identifier) { - String text = _token.text; - next(false); - - if (_token == null || _token.type != TokenType.Delimiter || !_token.text.equals("(")) { - eval = "null".equals(text) ? new LiteralExpr(null) : new VariableExpr(text); - } else { - Function f = ControlFunctionRegistry.getFunction(text); - Control c = ControlFunctionRegistry.getControl(text); - if (f == null && c == null) { - throw makeException("Unknown function or control named " + text); - } - - next(true); // swallow ( - - List args = parseExpressionList(")"); - - if (c != null) { - Evaluable[] argsA = makeArray(args); - String errorMessage = c.checkArguments(argsA); - if (errorMessage != null) { - throw makeException(errorMessage); - } - eval = new ControlCallExpr(argsA, c); - } else { - eval = new FunctionCallExpr(makeArray(args), f); - } - } - } else if (_token.type == TokenType.Delimiter && _token.text.equals("(")) { - next(true); - - eval = parseExpression(); - - if (_token != null && _token.type == TokenType.Delimiter && _token.text.equals(")")) { - next(false); - } else { - throw makeException("Missing )"); - } - } else if (_token.type == TokenType.Delimiter && _token.text.equals("[")) { // [ ... ] array - next(true); // swallow [ - - List args = parseExpressionList("]"); - - eval = new FunctionCallExpr(makeArray(args), new ArgsToArray()); - } else { - throw makeException("Missing number, string, identifier, regex, or parenthesized expression"); - } - - while (_token != null) { - if (_token.type == TokenType.Operator && _token.text.equals(".")) { - next(false); // swallow . - - if (_token == null || _token.type != TokenType.Identifier) { - throw makeException("Missing function name"); - } - - String identifier = _token.text; - next(false); - - if (_token != null && _token.type == TokenType.Delimiter && _token.text.equals("(")) { - next(true); // swallow ( - - Function f = ControlFunctionRegistry.getFunction(identifier); - if (f == null) { - throw makeException("Unknown function " + identifier); - } - - List args = parseExpressionList(")"); - args.add(0, eval); - - eval = new FunctionCallExpr(makeArray(args), f); - } else { - eval = new FieldAccessorExpr(eval, identifier); - } - } else if (_token.type == TokenType.Delimiter && _token.text.equals("[")) { - next(true); // swallow [ - - List args = parseExpressionList("]"); - args.add(0, eval); - - eval = new FunctionCallExpr(makeArray(args), ControlFunctionRegistry.getFunction("get")); - } else { - break; - } - } - - return eval; - } - - /** - * := - * | ( "," )* - * - */ - protected List parseExpressionList(String closingDelimiter) throws ParsingException { - List l = new LinkedList(); - - if (_token != null && - (_token.type != TokenType.Delimiter || !_token.text.equals(closingDelimiter))) { - - while (_token != null) { - Evaluable eval = parseExpression(); - - l.add(eval); - - if (_token != null && _token.type == TokenType.Delimiter && _token.text.equals(",")) { - next(true); // swallow comma, loop back for more - } else { - break; - } - } - } - - if (_token != null && _token.type == TokenType.Delimiter && _token.text.equals(closingDelimiter)) { - next(false); // swallow closing delimiter - } else { - throw makeException("Missing " + closingDelimiter); - } - - return l; - } - - protected Evaluable[] makeArray(List l) { - Evaluable[] a = new Evaluable[l.size()]; - l.toArray(a); - - return a; - } -} diff --git a/main/src/com/google/gridworks/gel/Scanner.java b/main/src/com/google/gridworks/gel/Scanner.java deleted file mode 100644 index a694407b8..000000000 --- a/main/src/com/google/gridworks/gel/Scanner.java +++ /dev/null @@ -1,304 +0,0 @@ -package com.google.gridworks.gel; - -public class Scanner { - static public enum TokenType { - Error, - Delimiter, - Operator, - Identifier, - Number, - String, - Regex - } - - static public class Token { - final public int start; - final public int end; - final public TokenType type; - final public String text; - - Token(int start, int end, TokenType type, String text) { - this.start = start; - this.end = end; - this.type = type; - this.text = text; - } - } - - static public class ErrorToken extends Token { - final public String detail; // error detail - - public ErrorToken(int start, int end, String text, String detail) { - super(start, end, TokenType.Error, text); - this.detail = detail; - } - } - - static public class NumberToken extends Token { - final public double value; - - public NumberToken(int start, int end, String text, double value) { - super(start, end, TokenType.Number, text); - this.value = value; - } - } - - static public class RegexToken extends Token { - final public boolean caseInsensitive; - - public RegexToken(int start, int end, String text, boolean caseInsensitive) { - super(start, end, TokenType.Regex, text); - this.caseInsensitive = caseInsensitive; - } - } - - protected String _text; // input text to tokenize - protected int _index; // index of the next character to process - protected int _limit; // process up to this index - - public Scanner(String s) { - this(s, 0, s.length()); - } - - public Scanner(String s, int from, int to) { - _text = s; - _index = from; - _limit = to; - } - - public int getIndex() { - return _index; - } - - /** - * The regexPossible flag is used by the parser to hint the scanner what to do - * when it encounters a slash. Since the divide operator / and the opening - * delimiter of a regex literal are the same, but divide operators and regex - * literals can't occur at the same place in an expression, this flag is a cheap - * way to distinguish the two without having to look ahead. - * - * @param regexPossible - * @return - */ - public Token next(boolean regexPossible) { - // skip whitespace - while (_index < _limit && Character.isWhitespace(_text.charAt(_index))) { - _index++; - } - if (_index == _limit) { - return null; - } - - char c = _text.charAt(_index); - int start = _index; - String detail = null; - - if (Character.isDigit(c)) { // number literal - double value = 0; - - while (_index < _limit && Character.isDigit(c = _text.charAt(_index))) { - value = value * 10 + (c - '0'); - _index++; - } - - if (_index < _limit && c == '.') { - _index++; - - double division = 1; - while (_index < _limit && Character.isDigit(c = _text.charAt(_index))) { - value = value * 10 + (c - '0'); - division *= 10; - _index++; - } - - value /= division; - } - - // TODO: support exponent e notation - - return new NumberToken( - start, - _index, - _text.substring(start, _index), - value - ); - } else if (c == '"' || c == '\'') { - /* - * String Literal - */ - - StringBuffer sb = new StringBuffer(); - char delimiter = c; - - _index++; // skip opening delimiter - - while (_index < _limit) { - c = _text.charAt(_index); - if (c == delimiter) { - _index++; // skip closing delimiter - - return new Token( - start, - _index, - TokenType.String, - sb.toString() - ); - } else if (c == '\\') { - _index++; // skip escaping marker - if (_index < _limit) { - char c2 = _text.charAt(_index); - if (c2 == 't') { - sb.append('\t'); - } else if (c2 == 'n') { - sb.append('\n'); - } else if (c2 == 'r') { - sb.append('\r'); - } else if (c2 == '\\') { - sb.append('\\'); - } else { - sb.append(c2); - } - } - } else { - sb.append(c); - } - _index++; - } - - detail = "String not properly closed"; - // fall through - - } else if (Character.isLetter(c) || c == '_') { // identifier - while (_index < _limit) { - char c1 = _text.charAt(_index); - if (c1 == '_' || Character.isLetterOrDigit(c1)) { - _index++; - } else { - break; - } - } - - return new Token( - start, - _index, - TokenType.Identifier, - _text.substring(start, _index) - ); - } else if (c == '/' && regexPossible) { - /* - * Regex literal - */ - StringBuffer sb = new StringBuffer(); - - _index++; // skip opening delimiter - - while (_index < _limit) { - c = _text.charAt(_index); - if (c == '/') { - _index++; // skip closing delimiter - - boolean caseInsensitive = false; - if (_index < _limit && _text.charAt(_index) == 'i') { - caseInsensitive = true; - _index++; - } - - return new RegexToken( - start, - _index, - sb.toString(), - caseInsensitive - ); - } else if (c == '\\') { - sb.append(c); - - _index++; // skip escaping marker - if (_index < _limit) { - sb.append(_text.charAt(_index)); - } - } else { - sb.append(c); - } - _index++; - } - - detail = "Regex not properly closed"; - // fall through - } else if ("+-*/.".indexOf(c) >= 0) { // operator - _index++; - - return new Token( - start, - _index, - TokenType.Operator, - _text.substring(start, _index) - ); - } else if ("()[],".indexOf(c) >= 0) { // delimiter - _index++; - - return new Token( - start, - _index, - TokenType.Delimiter, - _text.substring(start, _index) - ); - } else if (c == '!' && _index < _limit - 1 && _text.charAt(_index + 1) == '=') { - _index += 2; - return new Token( - start, - _index, - TokenType.Operator, - _text.substring(start, _index) - ); - } else if (c == '<') { - if (_index < _limit - 1 && - (_text.charAt(_index + 1) == '=' || - _text.charAt(_index + 1) == '>')) { - - _index += 2; - return new Token( - start, - _index, - TokenType.Operator, - _text.substring(start, _index) - ); - } else { - _index++; - return new Token( - start, - _index, - TokenType.Operator, - _text.substring(start, _index) - ); - } - } else if (">=".indexOf(c) >= 0) { // operator - if (_index < _limit - 1 && _text.charAt(_index + 1) == '=') { - _index += 2; - return new Token( - start, - _index, - TokenType.Operator, - _text.substring(start, _index) - ); - } else { - _index++; - return new Token( - start, - _index, - TokenType.Operator, - _text.substring(start, _index) - ); - } - } else { - _index++; - detail = "Unrecognized symbol"; - } - - return new ErrorToken( - start, - _index, - _text.substring(start, _index), - detail - ); - } -} diff --git a/main/src/com/google/gridworks/gel/ast/ControlCallExpr.java b/main/src/com/google/gridworks/gel/ast/ControlCallExpr.java deleted file mode 100644 index faae31a0f..000000000 --- a/main/src/com/google/gridworks/gel/ast/ControlCallExpr.java +++ /dev/null @@ -1,37 +0,0 @@ -package com.google.gridworks.gel.ast; - -import java.util.Properties; - -import com.google.gridworks.expr.Evaluable; -import com.google.gridworks.gel.Control; - -/** - * An abstract syntax tree node encapsulating a control call, such as "if". - */ -public class ControlCallExpr implements Evaluable { - final protected Evaluable[] _args; - final protected Control _control; - - public ControlCallExpr(Evaluable[] args, Control c) { - _args = args; - _control = c; - } - - public Object evaluate(Properties bindings) { - return _control.call(bindings, _args); - } - - @Override - public String toString() { - StringBuffer sb = new StringBuffer(); - - for (Evaluable ev : _args) { - if (sb.length() > 0) { - sb.append(", "); - } - sb.append(ev.toString()); - } - - return _control.getClass().getSimpleName() + "(" + sb.toString() + ")"; - } -} diff --git a/main/src/com/google/gridworks/gel/ast/FieldAccessorExpr.java b/main/src/com/google/gridworks/gel/ast/FieldAccessorExpr.java deleted file mode 100644 index 4863d432b..000000000 --- a/main/src/com/google/gridworks/gel/ast/FieldAccessorExpr.java +++ /dev/null @@ -1,50 +0,0 @@ -package com.google.gridworks.gel.ast; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONObject; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.expr.Evaluable; -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.expr.HasFields; - -/** - * An abstract syntax tree node encapsulating a field accessor, - * e.g., "cell.value" is accessing the field named "value" on the - * variable called "cell". - */ -public class FieldAccessorExpr implements Evaluable { - final protected Evaluable _inner; - final protected String _fieldName; - - public FieldAccessorExpr(Evaluable inner, String fieldName) { - _inner = inner; - _fieldName = fieldName; - } - - public Object evaluate(Properties bindings) { - Object o = _inner.evaluate(bindings); - if (ExpressionUtils.isError(o)) { - return o; // bubble the error up - } else if (o == null) { - return new EvalError("Cannot retrieve field from null"); - } else if (o instanceof HasFields) { - return ((HasFields) o).getField(_fieldName, bindings); - } else if (o instanceof JSONObject) { - try { - return ((JSONObject) o).get(_fieldName); - } catch (JSONException e) { - return new EvalError("Object does not have any field, including " + _fieldName); - } - } else { - return new EvalError("Object does not have any field, including " + _fieldName); - } - } - - @Override - public String toString() { - return _inner.toString() + "." + _fieldName; - } -} diff --git a/main/src/com/google/gridworks/gel/ast/FunctionCallExpr.java b/main/src/com/google/gridworks/gel/ast/FunctionCallExpr.java deleted file mode 100644 index c403696aa..000000000 --- a/main/src/com/google/gridworks/gel/ast/FunctionCallExpr.java +++ /dev/null @@ -1,48 +0,0 @@ -package com.google.gridworks.gel.ast; - -import java.util.Properties; - -import com.google.gridworks.expr.Evaluable; -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.gel.Function; - -/** - * An abstract syntax tree node encapsulating a function call. The function's - * arguments are all evaluated down to values before the function is applied. - * If any argument is an error, the function is not applied, and the error is - * the result of the expression. - */ -public class FunctionCallExpr implements Evaluable { - final protected Evaluable[] _args; - final protected Function _function; - - public FunctionCallExpr(Evaluable[] args, Function f) { - _args = args; - _function = f; - } - - public Object evaluate(Properties bindings) { - Object[] args = new Object[_args.length]; - for (int i = 0; i < _args.length; i++) { - Object v = _args[i].evaluate(bindings); - if (ExpressionUtils.isError(v)) { - return v; // bubble up the error - } - args[i] = v; - } - return _function.call(bindings, args); - } - - public String toString() { - StringBuffer sb = new StringBuffer(); - - for (Evaluable ev : _args) { - if (sb.length() > 0) { - sb.append(", "); - } - sb.append(ev.toString()); - } - - return _function.getClass().getSimpleName() + "(" + sb.toString() + ")"; - } -} diff --git a/main/src/com/google/gridworks/gel/ast/LiteralExpr.java b/main/src/com/google/gridworks/gel/ast/LiteralExpr.java deleted file mode 100644 index 4b8e0d5ea..000000000 --- a/main/src/com/google/gridworks/gel/ast/LiteralExpr.java +++ /dev/null @@ -1,26 +0,0 @@ -package com.google.gridworks.gel.ast; - -import java.util.Properties; - -import org.json.JSONObject; - -import com.google.gridworks.expr.Evaluable; - -/** - * An abstract syntax tree node encapsulating a literal value. - */ -public class LiteralExpr implements Evaluable { - final protected Object _value; - - public LiteralExpr(Object value) { - _value = value; - } - - public Object evaluate(Properties bindings) { - return _value; - } - - public String toString() { - return _value instanceof String ? JSONObject.quote((String) _value) : _value.toString(); - } -} diff --git a/main/src/com/google/gridworks/gel/ast/OperatorCallExpr.java b/main/src/com/google/gridworks/gel/ast/OperatorCallExpr.java deleted file mode 100644 index 56fe8c91a..000000000 --- a/main/src/com/google/gridworks/gel/ast/OperatorCallExpr.java +++ /dev/null @@ -1,89 +0,0 @@ -package com.google.gridworks.gel.ast; - -import java.util.Properties; - -import com.google.gridworks.expr.Evaluable; -import com.google.gridworks.expr.ExpressionUtils; - -/** - * An abstract syntax tree node encapsulating an operator call, such as "+". - */ -public class OperatorCallExpr implements Evaluable { - final protected Evaluable[] _args; - final protected String _op; - - public OperatorCallExpr(Evaluable[] args, String op) { - _args = args; - _op = op; - } - - public Object evaluate(Properties bindings) { - Object[] args = new Object[_args.length]; - for (int i = 0; i < _args.length; i++) { - Object v = _args[i].evaluate(bindings); - if (ExpressionUtils.isError(v)) { - return v; - } - args[i] = v; - } - - if (args.length == 2) { - if (args[0] != null && args[1] != null) { - if (args[0] instanceof Number && args[1] instanceof Number) { - if ("+".equals(_op)) { - return ((Number) args[0]).doubleValue() + ((Number) args[1]).doubleValue(); - } else if ("-".equals(_op)) { - return ((Number) args[0]).doubleValue() - ((Number) args[1]).doubleValue(); - } else if ("*".equals(_op)) { - return ((Number) args[0]).doubleValue() * ((Number) args[1]).doubleValue(); - } else if ("/".equals(_op)) { - return ((Number) args[0]).doubleValue() / ((Number) args[1]).doubleValue(); - } else if (">".equals(_op)) { - return ((Number) args[0]).doubleValue() > ((Number) args[1]).doubleValue(); - } else if (">=".equals(_op)) { - return ((Number) args[0]).doubleValue() >= ((Number) args[1]).doubleValue(); - } else if ("<".equals(_op)) { - return ((Number) args[0]).doubleValue() < ((Number) args[1]).doubleValue(); - } else if ("<=".equals(_op)) { - return ((Number) args[0]).doubleValue() <= ((Number) args[1]).doubleValue(); - } - } - - if ("+".equals(_op)) { - return args[0].toString() + args[1].toString(); - } - } - - if ("==".equals(_op)) { - if (args[0] != null) { - return args[0].equals(args[1]); - } else { - return args[1] == null; - } - } else if ("!=".equals(_op)) { - if (args[0] != null) { - return !args[0].equals(args[1]); - } else { - return args[1] != null; - } - } - } - return null; - } - - @Override - public String toString() { - StringBuffer sb = new StringBuffer(); - - for (Evaluable ev : _args) { - if (sb.length() > 0) { - sb.append(' '); - sb.append(_op); - sb.append(' '); - } - sb.append(ev.toString()); - } - - return sb.toString(); - } -} diff --git a/main/src/com/google/gridworks/gel/ast/VariableExpr.java b/main/src/com/google/gridworks/gel/ast/VariableExpr.java deleted file mode 100644 index 1bafa6568..000000000 --- a/main/src/com/google/gridworks/gel/ast/VariableExpr.java +++ /dev/null @@ -1,28 +0,0 @@ -package com.google.gridworks.gel.ast; - -import java.util.Properties; - -import com.google.gridworks.expr.Evaluable; - -/** - * An abstract syntax tree node encapsulating the retrieval of a variable's content. - */ -public class VariableExpr implements Evaluable { - final protected String _name; - - public VariableExpr(String name) { - _name = name; - } - - public Object evaluate(Properties bindings) { - return bindings.get(_name); - } - - public String toString() { - return _name; - } - - public String getName() { - return _name; - } -} diff --git a/main/src/com/google/gridworks/gel/controls/Filter.java b/main/src/com/google/gridworks/gel/controls/Filter.java deleted file mode 100644 index d77532b64..000000000 --- a/main/src/com/google/gridworks/gel/controls/Filter.java +++ /dev/null @@ -1,114 +0,0 @@ -package com.google.gridworks.gel.controls; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.Properties; - -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.expr.Evaluable; -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.gel.Control; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.ast.VariableExpr; - -public class Filter implements Control { - public String checkArguments(Evaluable[] args) { - if (args.length != 3) { - return ControlFunctionRegistry.getControlName(this) + " expects 3 arguments"; - } else if (!(args[1] instanceof VariableExpr)) { - return ControlFunctionRegistry.getControlName(this) + - " expects second argument to be a variable name"; - } - return null; - } - - public Object call(Properties bindings, Evaluable[] args) { - Object o = args[0].evaluate(bindings); - if (ExpressionUtils.isError(o)) { - return o; - } else if (!ExpressionUtils.isArrayOrCollection(o) && !(o instanceof JSONArray)) { - return new EvalError("First argument is not an array"); - } - - String name = ((VariableExpr) args[1]).getName(); - - Object oldValue = bindings.get(name); - try { - List results = null; - - if (o.getClass().isArray()) { - Object[] values = (Object[]) o; - - results = new ArrayList(values.length); - for (Object v : values) { - bindings.put(name, v); - - Object r = args[2].evaluate(bindings); - if (r instanceof Boolean && ((Boolean) r).booleanValue()) { - results.add(v); - } - } - } else if (o instanceof JSONArray) { - JSONArray a = (JSONArray) o; - int l = a.length(); - - results = new ArrayList(l); - for (int i = 0; i < l; i++) { - try { - Object v = a.get(i); - - bindings.put(name, v); - - Object r = args[2].evaluate(bindings); - if (r instanceof Boolean && ((Boolean) r).booleanValue()) { - results.add(v); - } - } catch (JSONException e) { - results.add(new EvalError(e.getMessage())); - } - } - } else { - Collection collection = ExpressionUtils.toObjectCollection(o); - - results = new ArrayList(collection.size()); - - for (Object v : collection) { - bindings.put(name, v); - - Object r = args[2].evaluate(bindings); - if (r instanceof Boolean && ((Boolean) r).booleanValue()) { - results.add(v); - } - } - } - - return results.toArray(); - } finally { - /* - * Restore the old value bound to the variable, if any. - */ - if (oldValue != null) { - bindings.put(name, oldValue); - } else { - bindings.remove(name); - } - } - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value( - "Evaluates expression a to an array. Then for each array element, binds its value to variable name v, evaluates expression test which should return a boolean. If the boolean is true, pushes v onto the result array." - ); - writer.key("params"); writer.value("expression a, variable v, expression test"); - writer.key("returns"); writer.value("array"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/gel/controls/ForEach.java b/main/src/com/google/gridworks/gel/controls/ForEach.java deleted file mode 100644 index 653dd1389..000000000 --- a/main/src/com/google/gridworks/gel/controls/ForEach.java +++ /dev/null @@ -1,111 +0,0 @@ -package com.google.gridworks.gel.controls; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.Properties; - -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.expr.Evaluable; -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.gel.Control; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.ast.VariableExpr; - -public class ForEach implements Control { - public String checkArguments(Evaluable[] args) { - if (args.length != 3) { - return ControlFunctionRegistry.getControlName(this) + " expects 3 arguments"; - } else if (!(args[1] instanceof VariableExpr)) { - return ControlFunctionRegistry.getControlName(this) + - " expects second argument to be a variable name"; - } - return null; - } - - public Object call(Properties bindings, Evaluable[] args) { - Object o = args[0].evaluate(bindings); - if (ExpressionUtils.isError(o)) { - return o; - } else if (!ExpressionUtils.isArrayOrCollection(o) && !(o instanceof JSONArray)) { - return new EvalError("First argument to forEach is not an array"); - } - - String name = ((VariableExpr) args[1]).getName(); - - Object oldValue = bindings.get(name); - try { - List results = null; - - if (o.getClass().isArray()) { - Object[] values = (Object[]) o; - - results = new ArrayList(values.length); - for (Object v : values) { - bindings.put(name, v); - - Object r = args[2].evaluate(bindings); - - results.add(r); - } - } else if (o instanceof JSONArray) { - JSONArray a = (JSONArray) o; - int l = a.length(); - - results = new ArrayList(l); - for (int i = 0; i < l; i++) { - try { - Object v = a.get(i); - - bindings.put(name, v); - - Object r = args[2].evaluate(bindings); - - results.add(r); - } catch (JSONException e) { - results.add(new EvalError(e.getMessage())); - } - } - } else { - Collection collection = ExpressionUtils.toObjectCollection(o); - - results = new ArrayList(collection.size()); - - for (Object v : collection) { - bindings.put(name, v); - - Object r = args[2].evaluate(bindings); - - results.add(r); - } - } - - return results.toArray(); - } finally { - /* - * Restore the old value bound to the variable, if any. - */ - if (oldValue != null) { - bindings.put(name, oldValue); - } else { - bindings.remove(name); - } - } - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value( - "Evaluates expression a to an array. Then for each array element, binds its value to variable name v, evaluates expression e, and pushes the result onto the result array." - ); - writer.key("params"); writer.value("expression a, variable v, expression e"); - writer.key("returns"); writer.value("array"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/gel/controls/ForEachIndex.java b/main/src/com/google/gridworks/gel/controls/ForEachIndex.java deleted file mode 100644 index e0b296e55..000000000 --- a/main/src/com/google/gridworks/gel/controls/ForEachIndex.java +++ /dev/null @@ -1,128 +0,0 @@ -package com.google.gridworks.gel.controls; - -import java.util.ArrayList; -import java.util.List; -import java.util.Properties; - -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.expr.Evaluable; -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.gel.Control; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.ast.VariableExpr; - -public class ForEachIndex implements Control { - public String checkArguments(Evaluable[] args) { - if (args.length != 4) { - return ControlFunctionRegistry.getControlName(this) + " expects 4 arguments"; - } else if (!(args[1] instanceof VariableExpr)) { - return ControlFunctionRegistry.getControlName(this) + - " expects second argument to be the index's variable name"; - } else if (!(args[2] instanceof VariableExpr)) { - return ControlFunctionRegistry.getControlName(this) + - " expects third argument to be the element's variable name"; - } - return null; - } - - public Object call(Properties bindings, Evaluable[] args) { - Object o = args[0].evaluate(bindings); - if (ExpressionUtils.isError(o)) { - return o; - } else if (!ExpressionUtils.isArrayOrCollection(o) && !(o instanceof JSONArray)) { - return new EvalError("First argument to forEach is not an array"); - } - - String indexName = ((VariableExpr) args[1]).getName(); - String elementName = ((VariableExpr) args[2]).getName(); - - Object oldIndexValue = bindings.get(indexName); - Object oldElementValue = bindings.get(elementName); - try { - List results = null; - - if (o.getClass().isArray()) { - Object[] values = (Object[]) o; - - results = new ArrayList(values.length); - - for (int i = 0; i < values.length; i++) { - Object v = values[i]; - - bindings.put(indexName, i); - bindings.put(elementName, v); - - Object r = args[3].evaluate(bindings); - - results.add(r); - } - } else if (o instanceof JSONArray) { - JSONArray a = (JSONArray) o; - int l = a.length(); - - results = new ArrayList(l); - for (int i = 0; i < l; i++) { - try { - Object v = a.get(i); - - bindings.put(indexName, i); - bindings.put(elementName, v); - - Object r = args[3].evaluate(bindings); - - results.add(r); - } catch (JSONException e) { - results.add(new EvalError(e.getMessage())); - } - } - } else { - List list = ExpressionUtils.toObjectList(o); - - results = new ArrayList(list.size()); - - for (int i = 0; i < list.size(); i++) { - Object v = list.get(i); - - bindings.put(indexName, i); - bindings.put(elementName, v); - - Object r = args[3].evaluate(bindings); - - results.add(r); - } - } - - return results.toArray(); - } finally { - /* - * Restore the old values bound to the variables, if any. - */ - if (oldIndexValue != null) { - bindings.put(indexName, oldIndexValue); - } else { - bindings.remove(indexName); - } - if (oldElementValue != null) { - bindings.put(elementName, oldElementValue); - } else { - bindings.remove(elementName); - } - } - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value( - "Evaluates expression a to an array. Then for each array element, binds its index to variable i and its value to variable name v, evaluates expression e, and pushes the result onto the result array." - ); - writer.key("params"); writer.value("expression a, variable i, variable v, expression e"); - writer.key("returns"); writer.value("array"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/gel/controls/ForNonBlank.java b/main/src/com/google/gridworks/gel/controls/ForNonBlank.java deleted file mode 100644 index 013f7dc7c..000000000 --- a/main/src/com/google/gridworks/gel/controls/ForNonBlank.java +++ /dev/null @@ -1,64 +0,0 @@ -package com.google.gridworks.gel.controls; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.Evaluable; -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.gel.Control; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.ast.VariableExpr; - -public class ForNonBlank implements Control { - public String checkArguments(Evaluable[] args) { - if (args.length != 4) { - return ControlFunctionRegistry.getControlName(this) + " expects 4 arguments"; - } else if (!(args[1] instanceof VariableExpr)) { - return ControlFunctionRegistry.getControlName(this) + - " expects second argument to be a variable name"; - } - return null; - } - - public Object call(Properties bindings, Evaluable[] args) { - Object o = args[0].evaluate(bindings); - - Evaluable var = args[1]; - String name = ((VariableExpr) var).getName(); - - if (ExpressionUtils.isNonBlankData(o)) { - Object oldValue = bindings.get(name); - bindings.put(name, o); - - try { - return args[2].evaluate(bindings); - } finally { - /* - * Restore the old value bound to the variable, if any. - */ - if (oldValue != null) { - bindings.put(name, oldValue); - } else { - bindings.remove(name); - } - } - } else { - return args[3].evaluate(bindings); - } - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value( - "Evaluates expression o. If it is non-blank, binds its value to variable name v, evaluates expression eNonBlank and returns the result. " + - "Otherwise (if o evaluates to blank), evaluates expression eBlank and returns that result instead." - ); - writer.key("params"); writer.value("expression o, variable v, expression eNonBlank, expression eBlank"); - writer.key("returns"); writer.value("Depends on actual arguments"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/gel/controls/ForRange.java b/main/src/com/google/gridworks/gel/controls/ForRange.java deleted file mode 100644 index 106112f1f..000000000 --- a/main/src/com/google/gridworks/gel/controls/ForRange.java +++ /dev/null @@ -1,110 +0,0 @@ -package com.google.gridworks.gel.controls; - -import java.util.ArrayList; -import java.util.List; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.expr.Evaluable; -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.gel.Control; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.ast.VariableExpr; - -public class ForRange implements Control { - public String checkArguments(Evaluable[] args) { - if (args.length != 5) { - return ControlFunctionRegistry.getControlName(this) + " expects 5 arguments"; - } else if (!(args[3] instanceof VariableExpr)) { - return ControlFunctionRegistry.getControlName(this) + - " expects third argument to be the element's variable name"; - } - return null; - } - - public Object call(Properties bindings, Evaluable[] args) { - Object fromO = args[0].evaluate(bindings); - Object toO = args[1].evaluate(bindings); - Object stepO = args[2].evaluate(bindings); - - if (ExpressionUtils.isError(fromO)) { - return fromO; - } else if (ExpressionUtils.isError(toO)) { - return toO; - } else if (ExpressionUtils.isError(stepO)) { - return stepO; - } else if (!(fromO instanceof Number) || !(toO instanceof Number) || !(stepO instanceof Number)) { - return new EvalError("First, second, and third arguments of forRange must all be numbers"); - } - - String indexName = ((VariableExpr) args[3]).getName(); - Object oldIndexValue = bindings.get(indexName); - - try { - List results = new ArrayList(); - - if (isIntegral((Number) fromO) && isIntegral((Number) stepO)) { - long from = ((Number) fromO).longValue(); - long step = ((Number) stepO).longValue(); - double to = ((Number) toO).doubleValue(); - - while (from < to) { - bindings.put(indexName, from); - - Object r = args[4].evaluate(bindings); - - results.add(r); - - from += step; - } - } else { - double from = ((Number) fromO).longValue(); - double step = ((Number) stepO).longValue(); - double to = ((Number) toO).doubleValue(); - - while (from < to) { - bindings.put(indexName, from); - - Object r = args[4].evaluate(bindings); - - results.add(r); - - from += step; - } - } - return results.toArray(); - } finally { - /* - * Restore the old values bound to the variables, if any. - */ - if (oldIndexValue != null) { - bindings.put(indexName, oldIndexValue); - } else { - bindings.remove(indexName); - } - } - } - - static private boolean isIntegral(Number o) { - if (o instanceof Integer || o instanceof Long) { - return true; - } else { - return (o.doubleValue() - o.longValue()) == 0; - } - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value( - "Iterates over the variable v starting at \"from\", incrementing by \"step\" each time while less than \"to\". At each iteration, evaluates expression e, and pushes the result onto the result array." - ); - writer.key("params"); writer.value("number from, number to, number step, variable v, expression e"); - writer.key("returns"); writer.value("array"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/gel/controls/If.java b/main/src/com/google/gridworks/gel/controls/If.java deleted file mode 100644 index 643b3e74d..000000000 --- a/main/src/com/google/gridworks/gel/controls/If.java +++ /dev/null @@ -1,44 +0,0 @@ -package com.google.gridworks.gel.controls; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.Evaluable; -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.gel.Control; -import com.google.gridworks.gel.ControlFunctionRegistry; - -public class If implements Control { - public String checkArguments(Evaluable[] args) { - if (args.length != 3) { - return ControlFunctionRegistry.getControlName(this) + " expects 3 arguments"; - } - return null; - } - - public Object call(Properties bindings, Evaluable[] args) { - Object o = args[0].evaluate(bindings); - if (ExpressionUtils.isError(o)) { - return o; // bubble the error up - } else if (ExpressionUtils.isTrue(o)) { - return args[1].evaluate(bindings); - } else { - return args[2].evaluate(bindings); - } - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value( - "Evaluates expression o. If it is true, evaluates expression eTrue and returns the result. " + - "Otherwise, evaluates expression eFalse and returns that result instead." - ); - writer.key("params"); writer.value("expression o, expression eTrue, expression eFalse"); - writer.key("returns"); writer.value("Depends on actual arguments"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/gel/controls/IsBlank.java b/main/src/com/google/gridworks/gel/controls/IsBlank.java deleted file mode 100644 index 3b3626159..000000000 --- a/main/src/com/google/gridworks/gel/controls/IsBlank.java +++ /dev/null @@ -1,15 +0,0 @@ -package com.google.gridworks.gel.controls; - -import com.google.gridworks.expr.ExpressionUtils; - -public class IsBlank extends IsTest { - @Override - protected String getDescription() { - return "Returns whether o is null or an empty string"; - } - - @Override - protected boolean test(Object o) { - return !ExpressionUtils.isNonBlankData(o); - } -} diff --git a/main/src/com/google/gridworks/gel/controls/IsError.java b/main/src/com/google/gridworks/gel/controls/IsError.java deleted file mode 100644 index ba71705f6..000000000 --- a/main/src/com/google/gridworks/gel/controls/IsError.java +++ /dev/null @@ -1,15 +0,0 @@ -package com.google.gridworks.gel.controls; - -import com.google.gridworks.expr.ExpressionUtils; - -public class IsError extends IsTest { - @Override - protected String getDescription() { - return "Returns whether o is an error"; - } - - @Override - protected boolean test(Object o) { - return ExpressionUtils.isError(o); - } -} diff --git a/main/src/com/google/gridworks/gel/controls/IsNonBlank.java b/main/src/com/google/gridworks/gel/controls/IsNonBlank.java deleted file mode 100644 index a2ec0734d..000000000 --- a/main/src/com/google/gridworks/gel/controls/IsNonBlank.java +++ /dev/null @@ -1,15 +0,0 @@ -package com.google.gridworks.gel.controls; - -import com.google.gridworks.expr.ExpressionUtils; - -public class IsNonBlank extends IsTest { - @Override - protected String getDescription() { - return "Returns whether o is not null and not an empty string"; - } - - @Override - protected boolean test(Object o) { - return ExpressionUtils.isNonBlankData(o); - } -} diff --git a/main/src/com/google/gridworks/gel/controls/IsNotNull.java b/main/src/com/google/gridworks/gel/controls/IsNotNull.java deleted file mode 100644 index 35128543d..000000000 --- a/main/src/com/google/gridworks/gel/controls/IsNotNull.java +++ /dev/null @@ -1,13 +0,0 @@ -package com.google.gridworks.gel.controls; - -public class IsNotNull extends IsTest { - @Override - protected String getDescription() { - return "Returns whether o is not null"; - } - - @Override - protected boolean test(Object o) { - return o != null; - } -} diff --git a/main/src/com/google/gridworks/gel/controls/IsNull.java b/main/src/com/google/gridworks/gel/controls/IsNull.java deleted file mode 100644 index db211cfa8..000000000 --- a/main/src/com/google/gridworks/gel/controls/IsNull.java +++ /dev/null @@ -1,13 +0,0 @@ -package com.google.gridworks.gel.controls; - -public class IsNull extends IsTest { - @Override - protected String getDescription() { - return "Returns whether o is null"; - } - - @Override - protected boolean test(Object o) { - return o == null; - } -} diff --git a/main/src/com/google/gridworks/gel/controls/IsNumeric.java b/main/src/com/google/gridworks/gel/controls/IsNumeric.java deleted file mode 100644 index 3b7f28fcc..000000000 --- a/main/src/com/google/gridworks/gel/controls/IsNumeric.java +++ /dev/null @@ -1,19 +0,0 @@ -package com.google.gridworks.gel.controls; - -import org.apache.commons.lang.StringUtils; - -public class IsNumeric extends IsTest { - @Override - protected String getDescription() { - return "Returns whether o can represent a number"; - } - - @Override - protected boolean test(Object o) { - if (o instanceof Number) return true; - - String s = (o instanceof String) ? (String) o : o.toString(); - - return StringUtils.isNumeric(s); - } -} diff --git a/main/src/com/google/gridworks/gel/controls/IsTest.java b/main/src/com/google/gridworks/gel/controls/IsTest.java deleted file mode 100644 index 2725459d8..000000000 --- a/main/src/com/google/gridworks/gel/controls/IsTest.java +++ /dev/null @@ -1,39 +0,0 @@ -package com.google.gridworks.gel.controls; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.Evaluable; -import com.google.gridworks.gel.Control; -import com.google.gridworks.gel.ControlFunctionRegistry; - -abstract class IsTest implements Control { - public String checkArguments(Evaluable[] args) { - if (args.length != 1) { - return ControlFunctionRegistry.getControlName(this) + " expects one argument"; - } - return null; - } - - public Object call(Properties bindings, Evaluable[] args) { - Object o = args[0].evaluate(bindings); - - return test(o); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value(getDescription()); - writer.key("params"); writer.value("expression o"); - writer.key("returns"); writer.value("boolean"); - writer.endObject(); - } - - abstract protected boolean test(Object v); - - abstract protected String getDescription(); -} diff --git a/main/src/com/google/gridworks/gel/controls/With.java b/main/src/com/google/gridworks/gel/controls/With.java deleted file mode 100644 index f7d109cce..000000000 --- a/main/src/com/google/gridworks/gel/controls/With.java +++ /dev/null @@ -1,60 +0,0 @@ -package com.google.gridworks.gel.controls; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.expr.Evaluable; -import com.google.gridworks.gel.Control; -import com.google.gridworks.gel.ControlFunctionRegistry; -import com.google.gridworks.gel.ast.VariableExpr; - -public class With implements Control { - public String checkArguments(Evaluable[] args) { - if (args.length != 3) { - return ControlFunctionRegistry.getControlName(this) + " expects 3 arguments"; - } else if (!(args[1] instanceof VariableExpr)) { - return ControlFunctionRegistry.getControlName(this) + - " expects second argument to be a variable name"; - } - return null; - } - - public Object call(Properties bindings, Evaluable[] args) { - Object o = args[0].evaluate(bindings); - String name = ((VariableExpr) args[1]).getName(); - - Object oldValue = bindings.get(name); - try { - if (o != null) { - bindings.put(name, o); - } else { - bindings.remove(name); - } - - return args[2].evaluate(bindings); - } finally { - /* - * Restore the old value bound to the variable, if any. - */ - if (oldValue != null) { - bindings.put(name, oldValue); - } else { - bindings.remove(name); - } - } - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value( - "Evaluates expression o and binds its value to variable name v. Then evaluates expression e and returns that result" - ); - writer.key("params"); writer.value("expression o, variable v, expression e"); - writer.key("returns"); writer.value("Depends on actual arguments"); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/history/Change.java b/main/src/com/google/gridworks/history/Change.java deleted file mode 100644 index 857ed31fc..000000000 --- a/main/src/com/google/gridworks/history/Change.java +++ /dev/null @@ -1,21 +0,0 @@ -package com.google.gridworks.history; - -import java.io.IOException; -import java.io.Writer; -import java.util.Properties; - -import com.google.gridworks.model.Project; - -/** - * Interface for a concrete change to a project's data. A change should consist - * of new values already computed. When apply() is called, the change should not - * spend any more time computing anything. It should simply save existing values - * and swap in new values. Similarly, when revert() is called, the change - * should only swap old values back in. - */ -public interface Change { - public void apply(Project project); - public void revert(Project project); - - public void save(Writer writer, Properties options) throws IOException; -} diff --git a/main/src/com/google/gridworks/history/ChangeSequence.java b/main/src/com/google/gridworks/history/ChangeSequence.java deleted file mode 100644 index 4e79e2378..000000000 --- a/main/src/com/google/gridworks/history/ChangeSequence.java +++ /dev/null @@ -1,65 +0,0 @@ -package com.google.gridworks.history; - -import java.io.IOException; -import java.io.LineNumberReader; -import java.io.Writer; -import java.util.Properties; - -import com.google.gridworks.model.Project; -import com.google.gridworks.util.Pool; - -public class ChangeSequence implements Change { - final protected Change[] _changes; - - public ChangeSequence(Change[] changes) { - _changes = changes; - } - - public void apply(Project project) { - synchronized (project) { - for (int i = 0; i < _changes.length; i++) { - _changes[i].apply(project); - } - } - } - - public void revert(Project project) { - synchronized (project) { - for (int i = _changes.length - 1; i >= 0 ; i--) { - _changes[i].apply(project); - } - } - } - - public void save(Writer writer, Properties options) throws IOException { - writer.write("count="); writer.write(Integer.toString(_changes.length)); writer.write('\n'); - for (int i = 0; i < _changes.length; i++) { - Change change = _changes[i]; - - writer.write(change.getClass().getName()); writer.write('\n'); - - change.save(writer, options); - } - writer.write("/ec/\n"); // end of change marker - } - - static public Change load(LineNumberReader reader, Pool pool) throws Exception { - String line = reader.readLine(); - if (line == null) line = ""; - int equal = line.indexOf('='); - - assert "count".equals(line.substring(0, equal)); - - int count = Integer.parseInt(line.substring(equal + 1)); - Change[] changes = new Change[count]; - - for (int i = 0; i < count; i++) { - changes[i] = History.readOneChange(reader, pool); - } - - line = reader.readLine(); - assert "/ec/".equals(line); - - return new ChangeSequence(changes); - } -} diff --git a/main/src/com/google/gridworks/history/History.java b/main/src/com/google/gridworks/history/History.java deleted file mode 100644 index 5991ccce8..000000000 --- a/main/src/com/google/gridworks/history/History.java +++ /dev/null @@ -1,278 +0,0 @@ -package com.google.gridworks.history; - -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.LineNumberReader; -import java.io.OutputStream; -import java.io.OutputStreamWriter; -import java.io.Writer; -import java.lang.reflect.Method; -import java.util.ArrayList; -import java.util.LinkedList; -import java.util.List; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.GridworksServlet; -import com.google.gridworks.Jsonizable; -import com.google.gridworks.ProjectManager; -import com.google.gridworks.model.Project; -import com.google.gridworks.util.Pool; - -/** - * Track done and undone changes. Done changes can be undone; undone changes can be redone. - * Each change is actually not tracked directly but through a history entry. The history - * entry stores only the metadata, while the change object stores the actual data. Thus - * the history entries are much smaller and can be kept in memory, while the change objects - * are only loaded into memory on demand. - */ -public class History implements Jsonizable { - static public Change readOneChange(InputStream in, Pool pool) throws Exception { - LineNumberReader reader = new LineNumberReader(new InputStreamReader(in)); - try { - return readOneChange(reader, pool); - } finally { - reader.close(); - } - } - - static public Change readOneChange(LineNumberReader reader, Pool pool) throws Exception { - /* String version = */ reader.readLine(); - - String className = reader.readLine(); - Class klass = getChangeClass(className); - - Method load = klass.getMethod("load", LineNumberReader.class, Pool.class); - - return (Change) load.invoke(null, reader, pool); - } - - static public void writeOneChange(OutputStream out, Change change, Pool pool) throws IOException { - Writer writer = new OutputStreamWriter(out); - try { - History.writeOneChange(writer, change, pool); - } finally { - writer.flush(); - } - } - - static public void writeOneChange(Writer writer, Change change, Pool pool) throws IOException { - Properties options = new Properties(); - options.setProperty("mode", "save"); - options.put("pool", pool); - - writeOneChange(writer, change, options); - } - - static public void writeOneChange(Writer writer, Change change, Properties options) throws IOException { - writer.write(GridworksServlet.getVersion()); writer.write('\n'); - writer.write(change.getClass().getName()); writer.write('\n'); - - change.save(writer, options); - } - - @SuppressWarnings("unchecked") - static public Class getChangeClass(String className) throws ClassNotFoundException { - return (Class) GridworksServlet.getClass(className); - } - - protected long _projectID; - protected List _pastEntries; // done changes, can be undone - protected List _futureEntries; // undone changes, can be redone - - public History(Project project) { - _projectID = project.id; - _pastEntries = new ArrayList(); - _futureEntries = new ArrayList(); - } - - /** - * Adds a HistoryEntry to the list of past histories - * Adding a new entry clears all currently held future histories - * @param entry - */ - synchronized public void addEntry(HistoryEntry entry) { - entry.apply(ProjectManager.singleton.getProject(_projectID)); - _pastEntries.add(entry); - - setModified(); - - // Any new change will clear all future entries. - List futureEntries = _futureEntries; - _futureEntries = new ArrayList(); - - for (HistoryEntry entry2 : futureEntries) { - try { - // remove residual data on disk - entry2.delete(); - } catch (Exception e) { - e.printStackTrace(); - } - } - } - - protected void setModified() { - ProjectManager.singleton.getProjectMetadata(_projectID).updateModified(); - } - - synchronized public List getLastPastEntries(int count) { - if (count <= 0) { - return new LinkedList(_pastEntries); - } else { - return _pastEntries.subList(Math.max(_pastEntries.size() - count, 0), _pastEntries.size()); - } - } - - synchronized public void undoRedo(long lastDoneEntryID) { - if (lastDoneEntryID == 0) { - // undo all the way back to the start of the project - undo(_pastEntries.size()); - } else { - for (int i = 0; i < _pastEntries.size(); i++) { - if (_pastEntries.get(i).id == lastDoneEntryID) { - undo(_pastEntries.size() - i - 1); - return; - } - } - - for (int i = 0; i < _futureEntries.size(); i++) { - if (_futureEntries.get(i).id == lastDoneEntryID) { - redo(i + 1); - return; - } - } - } - } - - synchronized public long getPrecedingEntryID(long entryID) { - if (entryID == 0) { - return -1; - } else { - for (int i = 0; i < _pastEntries.size(); i++) { - if (_pastEntries.get(i).id == entryID) { - return i == 0 ? 0 : _pastEntries.get(i - 1).id; - } - } - - for (int i = 0; i < _futureEntries.size(); i++) { - if (_futureEntries.get(i).id == entryID) { - if (i > 0) { - return _futureEntries.get(i - 1).id; - } else if (_pastEntries.size() > 0) { - return _pastEntries.get(_pastEntries.size() - 1).id; - } else { - return 0; - } - } - } - } - return -1; - } - - protected HistoryEntry getEntry(long entryID) { - for (int i = 0; i < _pastEntries.size(); i++) { - if (_pastEntries.get(i).id == entryID) { - return _pastEntries.get(i); - } - } - - for (int i = 0; i < _futureEntries.size(); i++) { - if (_futureEntries.get(i).id == entryID) { - return _futureEntries.get(i); - } - } - return null; - } - - protected void undo(int times) { - Project project = ProjectManager.singleton.getProject(_projectID); - - while (times > 0 && _pastEntries.size() > 0) { - HistoryEntry entry = _pastEntries.get(_pastEntries.size() - 1); - - entry.revert(project); - - setModified(); - times--; - - _pastEntries.remove(_pastEntries.size() - 1); - _futureEntries.add(0, entry); - } - } - - protected void redo(int times) { - Project project = ProjectManager.singleton.getProject(_projectID); - - while (times > 0 && _futureEntries.size() > 0) { - HistoryEntry entry = _futureEntries.get(0); - - entry.apply(project); - - setModified(); - times--; - - _pastEntries.add(entry); - _futureEntries.remove(0); - } - } - - synchronized public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - - writer.key("past"); writer.array(); - for (HistoryEntry entry : _pastEntries) { - entry.write(writer, options); - } - writer.endArray(); - - writer.key("future"); writer.array(); - for (HistoryEntry entry : _futureEntries) { - entry.write(writer, options); - } - writer.endArray(); - - writer.endObject(); - } - - synchronized public void save(Writer writer, Properties options) throws IOException { - writer.write("pastEntryCount="); writer.write(Integer.toString(_pastEntries.size())); writer.write('\n'); - for (HistoryEntry entry : _pastEntries) { - entry.save(writer, options); writer.write('\n'); - } - - writer.write("futureEntryCount="); writer.write(Integer.toString(_futureEntries.size())); writer.write('\n'); - for (HistoryEntry entry : _futureEntries) { - entry.save(writer, options); writer.write('\n'); - } - - writer.write("/e/\n"); - } - - synchronized public void load(Project project, LineNumberReader reader) throws Exception { - String line; - while ((line = reader.readLine()) != null && !"/e/".equals(line)) { - int equal = line.indexOf('='); - CharSequence field = line.subSequence(0, equal); - String value = line.substring(equal + 1); - - if ("pastEntryCount".equals(field)) { - int count = Integer.parseInt(value); - - for (int i = 0; i < count; i++) { - _pastEntries.add(HistoryEntry.load(project, reader.readLine())); - } - } else if ("futureEntryCount".equals(field)) { - int count = Integer.parseInt(value); - - for (int i = 0; i < count; i++) { - _futureEntries.add(HistoryEntry.load(project, reader.readLine())); - } - } - } - } -} diff --git a/main/src/com/google/gridworks/history/HistoryEntry.java b/main/src/com/google/gridworks/history/HistoryEntry.java deleted file mode 100644 index 8498c788d..000000000 --- a/main/src/com/google/gridworks/history/HistoryEntry.java +++ /dev/null @@ -1,139 +0,0 @@ -package com.google.gridworks.history; - -import java.io.Writer; -import java.util.Date; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.Jsonizable; -import com.google.gridworks.ProjectManager; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Project; -import com.google.gridworks.operations.OperationRegistry; -import com.google.gridworks.util.ParsingUtilities; - -/** - * This is the metadata of a Change. It's small, so we can load it in order to - * obtain information about a change without actually loading the change. - */ -public class HistoryEntry implements Jsonizable { - final public long id; - final public long projectID; - final public String description; - final public Date time; - - // the manager (deals with IO systems or databases etc.) - final public HistoryEntryManager _manager; - - // the abstract operation, if any, that results in the change - final public AbstractOperation operation; - - // the actual change, loaded on demand - private transient Change _change; - - private final static String OPERATION = "operation"; - - public void setChange(Change _change) { - this._change = _change; - } - - public Change getChange() { - return _change; - } - - static public long allocateID() { - return Math.round(Math.random() * 1000000) + System.currentTimeMillis(); - } - - public HistoryEntry(long id, Project project, String description, AbstractOperation operation, Change change) { - this.id = id; - this.projectID = project.id; - this.description = description; - this.operation = operation; - this.time = new Date(); - - this._manager = ProjectManager.singleton.getHistoryEntryManager(); - setChange(change); - } - - protected HistoryEntry(long id, long projectID, String description, AbstractOperation operation, Date time) { - this.id = id; - this.projectID = projectID; - this.description = description; - this.operation = operation; - this.time = time; - this._manager = ProjectManager.singleton.getHistoryEntryManager(); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("id"); writer.value(id); - writer.key("description"); writer.value(description); - writer.key("time"); writer.value(ParsingUtilities.dateToString(time)); - if ("save".equals(options.getProperty("mode")) && operation != null) { - writer.key(OPERATION); operation.write(writer, options); - } - writer.endObject(); - } - - public void save(Writer writer, Properties options){ - _manager.save(this, writer, options); - } - - public void apply(Project project) { - if (getChange() == null) { - ProjectManager.singleton.getHistoryEntryManager().loadChange(this); - } - - synchronized (project) { - getChange().apply(project); - - // When a change is applied, it can hang on to old data (in order to be able - // to revert later). Hence, we need to save the change out. - - try { - _manager.saveChange(this); - } catch (Exception e) { - e.printStackTrace(); - - getChange().revert(project); - - throw new RuntimeException("Failed to apply change", e); - } - } - } - - public void revert(Project project) { - if (getChange() == null) { - _manager.loadChange(this); - } - getChange().revert(project); - } - - static public HistoryEntry load(Project project, String s) throws Exception { - JSONObject obj = ParsingUtilities.evaluateJsonStringToObject(s); - - AbstractOperation operation = null; - if (obj.has(OPERATION) && !obj.isNull(OPERATION)) { - operation = OperationRegistry.reconstruct(project, obj.getJSONObject(OPERATION)); - } - - return new HistoryEntry( - obj.getLong("id"), - project.id, - obj.getString("description"), - operation, - ParsingUtilities.stringToDate(obj.getString("time")) - ); - } - - public void delete(){ - _manager.delete(this); - } - -} diff --git a/main/src/com/google/gridworks/history/HistoryEntryManager.java b/main/src/com/google/gridworks/history/HistoryEntryManager.java deleted file mode 100644 index 759da5146..000000000 --- a/main/src/com/google/gridworks/history/HistoryEntryManager.java +++ /dev/null @@ -1,12 +0,0 @@ -package com.google.gridworks.history; - -import java.io.Writer; -import java.util.Properties; - - -public interface HistoryEntryManager { - public void loadChange(HistoryEntry historyEntry); - public void saveChange(HistoryEntry historyEntry) throws Exception; - public void save(HistoryEntry historyEntry, Writer writer, Properties options); - public void delete(HistoryEntry historyEntry); -} diff --git a/main/src/com/google/gridworks/history/HistoryProcess.java b/main/src/com/google/gridworks/history/HistoryProcess.java deleted file mode 100644 index cda73172a..000000000 --- a/main/src/com/google/gridworks/history/HistoryProcess.java +++ /dev/null @@ -1,73 +0,0 @@ -package com.google.gridworks.history; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.model.Project; -import com.google.gridworks.process.Process; -import com.google.gridworks.process.ProcessManager; - -/** - * The process for undoing or redoing. This involves calling apply() and revert() - * on changes. - */ -public class HistoryProcess extends Process { - final protected Project _project; - final protected long _lastDoneID; - final protected String _description; - - protected boolean _done = false; - - private final static String WARN = "Not a long-running process"; - - public HistoryProcess(Project project, long lastDoneID) { - _project = project; - _lastDoneID = lastDoneID; - - if (_lastDoneID == 0) { - _description = "Undo all"; - } else { - HistoryEntry entry = _project.history.getEntry(_lastDoneID); - _description = "Undo/redo until after " + entry.description; - } - } - - public void cancel() { - throw new RuntimeException(WARN); - } - - public boolean isImmediate() { - return true; - } - - public HistoryEntry performImmediate() { - _project.history.undoRedo(_lastDoneID); - _done = true; - - return null; - } - - public void startPerforming(ProcessManager manager) { - throw new RuntimeException(WARN); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("description"); writer.value(_description); - writer.key("immediate"); writer.value(true); - writer.key("status"); writer.value(_done ? "done" : "pending"); - writer.endObject(); - } - - public boolean isDone() { - throw new RuntimeException(WARN); - } - - public boolean isRunning() { - throw new RuntimeException(WARN); - } -} diff --git a/main/src/com/google/gridworks/importers/ExcelImporter.java b/main/src/com/google/gridworks/importers/ExcelImporter.java deleted file mode 100644 index ecd07c526..000000000 --- a/main/src/com/google/gridworks/importers/ExcelImporter.java +++ /dev/null @@ -1,304 +0,0 @@ -package com.google.gridworks.importers; - -import java.io.IOException; -import java.io.InputStream; -import java.io.Serializable; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.Set; - -import org.apache.poi.common.usermodel.Hyperlink; -import org.apache.poi.hssf.usermodel.HSSFDateUtil; -import org.apache.poi.hssf.usermodel.HSSFWorkbook; -import org.apache.poi.poifs.filesystem.POIFSFileSystem; -import org.apache.poi.ss.usermodel.Sheet; -import org.apache.poi.ss.usermodel.Workbook; -import org.apache.poi.xssf.usermodel.XSSFWorkbook; - -import com.google.gridworks.ProjectMetadata; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Recon; -import com.google.gridworks.model.ReconCandidate; -import com.google.gridworks.model.Row; -import com.google.gridworks.model.Recon.Judgment; - -public class ExcelImporter implements StreamImporter { - protected boolean _xmlBased; - - @Override - public void read(InputStream inputStream, Project project, ProjectMetadata metadata, Properties options) throws ImportException { - int ignoreLines = ImporterUtilities.getIntegerOption("ignore", options, -1); - int headerLines = ImporterUtilities.getIntegerOption("header-lines", options, 1); - int limit = ImporterUtilities.getIntegerOption("limit", options, -1); - int skip = ImporterUtilities.getIntegerOption("skip", options, 0); - - Workbook wb = null; - try { - wb = _xmlBased ? - new XSSFWorkbook(inputStream) : - new HSSFWorkbook(new POIFSFileSystem(inputStream)); - } catch (IOException e) { - throw new ImportException( - "Attempted to parse file as Excel file but failed. " + - "Try to use Excel to re-save the file as a different Excel version or as TSV and upload again.", - e - ); - } - - Sheet sheet = wb.getSheetAt(0); - - int firstRow = sheet.getFirstRowNum(); - int lastRow = sheet.getLastRowNum(); - - List columnNames = new ArrayList(); - Set columnNameSet = new HashSet(); - Map columnRootNameToIndex = new HashMap(); - - int rowsWithData = 0; - Map reconMap = new HashMap(); - - for (int r = firstRow; r <= lastRow; r++) { - org.apache.poi.ss.usermodel.Row row = sheet.getRow(r); - if (row == null) { - continue; - } else if (ignoreLines > 0) { - ignoreLines--; - continue; - } - - short firstCell = row.getFirstCellNum(); - short lastCell = row.getLastCellNum(); - if (firstCell < 0 || firstCell > lastCell) { - continue; - } - - /* - * Still processing header lines - */ - if (headerLines > 0) { - headerLines--; - - for (int c = firstCell; c <= lastCell; c++) { - org.apache.poi.ss.usermodel.Cell cell = row.getCell(c); - if (cell != null) { - Serializable value = extractCell(cell); - String text = value != null ? value.toString() : null; - if (text != null && text.length() > 0) { - while (columnNames.size() < c + 1) { - columnNames.add(null); - } - - String existingName = columnNames.get(c); - String name = (existingName == null) ? text : (existingName + " " + text); - - columnNames.set(c, name); - } - } - } - - if (headerLines == 0) { - for (int i = 0; i < columnNames.size(); i++) { - String rootName = columnNames.get(i); - if (rootName == null) { - continue; - } - setUnduplicatedColumnName(rootName, columnNames, i, columnNameSet, columnRootNameToIndex); - } - } - - /* - * Processing data rows - */ - } else { - Row newRow = new Row(columnNames.size()); - boolean hasData = false; - - for (int c = firstCell; c <= lastCell; c++) { - org.apache.poi.ss.usermodel.Cell cell = row.getCell(c); - if (cell == null) { - continue; - } - - Cell ourCell = extractCell(cell, reconMap); - if (ourCell != null) { - while (columnNames.size() < c + 1) { - columnNames.add(null); - } - if (columnNames.get(c) == null) { - setUnduplicatedColumnName("Column", columnNames, c, columnNameSet, columnRootNameToIndex); - } - - newRow.setCell(c, ourCell); - hasData = true; - } - } - - if (hasData) { - rowsWithData++; - - if (skip <= 0 || rowsWithData > skip) { - project.rows.add(newRow); - project.columnModel.setMaxCellIndex(newRow.cells.size()); - - if (limit > 0 && project.rows.size() >= limit) { - break; - } - } - } - } - } - - /* - * Create columns - */ - for (int c = 0; c < columnNames.size(); c++) { - String name = columnNames.get(c); - if (name != null) { - Column column = new Column(c, name); - project.columnModel.columns.add(column); - } - } - } - - protected void setUnduplicatedColumnName( - String rootName, List columnNames, int index, Set columnNameSet, Map columnRootNameToIndex) { - if (columnNameSet.contains(rootName)) { - int startIndex = columnRootNameToIndex.containsKey(rootName) ? columnRootNameToIndex.get(rootName) : 2; - while (true) { - String name = rootName + " " + startIndex; - if (columnNameSet.contains(name)) { - startIndex++; - } else { - columnNames.set(index, name); - columnNameSet.add(name); - break; - } - } - - columnRootNameToIndex.put(rootName, startIndex + 1); - } else { - columnNames.set(index, rootName); - columnNameSet.add(rootName); - } - } - - protected Serializable extractCell(org.apache.poi.ss.usermodel.Cell cell) { - int cellType = cell.getCellType(); - if (cellType == org.apache.poi.ss.usermodel.Cell.CELL_TYPE_ERROR || - cellType == org.apache.poi.ss.usermodel.Cell.CELL_TYPE_BLANK) { - return null; - } - if (cellType == org.apache.poi.ss.usermodel.Cell.CELL_TYPE_FORMULA) { - cellType = cell.getCachedFormulaResultType(); - } - - Serializable value = null; - if (cellType == org.apache.poi.ss.usermodel.Cell.CELL_TYPE_BOOLEAN) { - value = cell.getBooleanCellValue(); - } else if (cellType == org.apache.poi.ss.usermodel.Cell.CELL_TYPE_NUMERIC) { - double d = cell.getNumericCellValue(); - - if (HSSFDateUtil.isCellDateFormatted(cell)) { - value = HSSFDateUtil.getJavaDate(d); - } else { - value = d; - } - } else { - String text = cell.getStringCellValue().trim(); - if (text.length() > 0) { - value = text; - } - } - - return value; - } - - protected Cell extractCell(org.apache.poi.ss.usermodel.Cell cell, Map reconMap) { - Serializable value = extractCell(cell); - - if (value != null) { - Recon recon = null; - - Hyperlink hyperlink = cell.getHyperlink(); - if (hyperlink != null) { - String url = hyperlink.getAddress(); - - if (url.startsWith("http://") || - url.startsWith("https://")) { - - final String sig = "freebase.com/view"; - - int i = url.indexOf(sig); - if (i > 0) { - String id = url.substring(i + sig.length()); - - int q = id.indexOf('?'); - if (q > 0) { - id = id.substring(0, q); - } - int h = id.indexOf('#'); - if (h > 0) { - id = id.substring(0, h); - } - - if (reconMap.containsKey(id)) { - recon = reconMap.get(id); - recon.judgmentBatchSize++; - } else { - recon = new Recon(0, null, null); - recon.service = "import"; - recon.match = new ReconCandidate(id, value.toString(), new String[0], 100); - recon.matchRank = 0; - recon.judgment = Judgment.Matched; - recon.judgmentAction = "auto"; - recon.judgmentBatchSize = 1; - recon.addCandidate(recon.match); - - reconMap.put(id, recon); - } - - } - } - } - - return new Cell(value, recon); - } else { - return null; - } - } - - @Override - public boolean canImportData(String contentType, String fileName) { - if (contentType != null) { - contentType = contentType.toLowerCase().trim(); - if ("application/msexcel".equals(contentType) || - "application/x-msexcel".equals(contentType) || - "application/x-ms-excel".equals(contentType) || - "application/vnd.ms-excel".equals(contentType) || - "application/x-excel".equals(contentType) || - "application/xls".equals(contentType)) { - this._xmlBased = false; - return true; - } else if("application/x-xls".equals(contentType)) { - this._xmlBased = true; - return true; - } - } else if (fileName != null) { - fileName = fileName.toLowerCase(); - if (fileName.endsWith(".xls")) { - this._xmlBased = false; - return true; - } else if (fileName.endsWith(".xlsx")) { - this._xmlBased = true; - return true; - } - } - return false; - } -} diff --git a/main/src/com/google/gridworks/importers/ImportException.java b/main/src/com/google/gridworks/importers/ImportException.java deleted file mode 100644 index 139e0e2cc..000000000 --- a/main/src/com/google/gridworks/importers/ImportException.java +++ /dev/null @@ -1,15 +0,0 @@ -package com.google.gridworks.importers; - -/** - * Exception thrown by importers. Typically contains a nested exception - * indicating the underlying cause of the problem. - */ -public class ImportException extends Exception { - - private static final long serialVersionUID = 7077314805989174181L; - - public ImportException(String message, Throwable cause) { - super(message, cause); - } - -} diff --git a/main/src/com/google/gridworks/importers/Importer.java b/main/src/com/google/gridworks/importers/Importer.java deleted file mode 100644 index 15e235a16..000000000 --- a/main/src/com/google/gridworks/importers/Importer.java +++ /dev/null @@ -1,14 +0,0 @@ -package com.google.gridworks.importers; - - -public interface Importer { - - /** - * Determine whether importer can handle given contentType and filename. - * - * @param contentType - * @param fileName - * @return true if the importer can handle this - */ - public boolean canImportData(String contentType, String fileName); -} diff --git a/main/src/com/google/gridworks/importers/ImporterRegistry.java b/main/src/com/google/gridworks/importers/ImporterRegistry.java deleted file mode 100644 index d84afc0dc..000000000 --- a/main/src/com/google/gridworks/importers/ImporterRegistry.java +++ /dev/null @@ -1,103 +0,0 @@ -package com.google.gridworks.importers; - -import java.net.URL; -import java.util.HashMap; -import java.util.Map; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - - -abstract public class ImporterRegistry { - final static Logger logger = LoggerFactory.getLogger("importer-registry"); - - static final private Map importers = new HashMap(); - - private static final String[][] importerNames = { - {"ExcelImporter", "com.google.gridworks.importers.ExcelImporter"}, - {"XmlImporter", "com.google.gridworks.importers.XmlImporter"}, - {"RdfTripleImporter", "com.google.gridworks.importers.RdfTripleImporter"}, - {"MarcImporter", "com.google.gridworks.importers.MarcImporter"}, - {"TsvCsvImporter", "com.google.gridworks.importers.TsvCsvImporter"} - }; - - static { - registerImporters(importerNames); - } - - static public boolean registerImporters(String[][] importers) { - boolean status = true; - for (String[] importer : importerNames) { - String importerName = importer[0]; - String className = importer[1]; - logger.debug("Loading command " + importerName + " class: " + className); - Importer cmd; - try { - // TODO: May need to use the servlet container's class loader here - cmd = (Importer) Class.forName(className).newInstance(); - } catch (InstantiationException e) { - logger.error("Failed to load importer class " + className, e); - status = false; - continue; - } catch (IllegalAccessException e) { - logger.error("Failed to load importer class " + className, e); - status = false; - continue; - } catch (ClassNotFoundException e) { - logger.error("Failed to load importer class " + className, e); - status = false; - continue; - } - status |= registerImporter(importerName, cmd); - } - return status; - } - - /** - * Register a single importer. - * - * @param name importer verb for importer - * @param importerObject object implementing the importer - * - * @return true if importer was loaded and registered successfully - */ - static public boolean registerImporter(String name, Importer importerObject) { - if (importers.containsKey(name)) { - return false; - } - importers.put(name, importerObject); - return true; - } - - // Currently only for test purposes - static protected boolean unregisterImporter(String verb) { - return importers.remove(verb) != null; - } - - static public Importer guessImporter(String contentType, String fileName, boolean provideDefault) { - for (Importer i : importers.values()){ - if(i.canImportData(contentType, fileName)){ - return i; - } - } - if (provideDefault) { - return new TsvCsvImporter(); // default - } else { - return null; - } - } - - static public Importer guessImporter(String contentType, String filename) { - return guessImporter(contentType, filename, true); - } - - static public Importer guessUrlImporter(URL url) { - for (Importer importer : importers.values()){ - if (importer instanceof UrlImporter - && ((UrlImporter) importer).canImportData(url)) { - return importer; - } - } - return null; - } -} diff --git a/main/src/com/google/gridworks/importers/ImporterUtilities.java b/main/src/com/google/gridworks/importers/ImporterUtilities.java deleted file mode 100644 index 775761536..000000000 --- a/main/src/com/google/gridworks/importers/ImporterUtilities.java +++ /dev/null @@ -1,111 +0,0 @@ -package com.google.gridworks.importers; - -import java.io.Serializable; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Properties; - -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; - -public class ImporterUtilities { - - static public Serializable parseCellValue(String text) { - if (text.length() > 0) { - if (text.length() > 1 && text.startsWith("\"") && text.endsWith("\"")) { - return text.substring(1, text.length() - 1); - } - - try { - return Long.parseLong(text); - } catch (NumberFormatException e) { - } - - try { - double d = Double.parseDouble(text); - if (!Double.isInfinite(d) && !Double.isNaN(d)) { - return d; - } - } catch (NumberFormatException e) { - } - text = text.trim(); - } - return text; - } - - static public int getIntegerOption(String name, Properties options, int def) { - int value = def; - if (options.containsKey(name)) { - String s = options.getProperty(name); - try { - value = Integer.parseInt(s); - } catch (Exception e) { - } - } - return value; - } - - static public boolean getBooleanOption(String name, Properties options, boolean def) { - boolean value = def; - if (options.containsKey(name)) { - String s = options.getProperty(name); - try { - value = s.equalsIgnoreCase("on") || s.equals("1") || Boolean.parseBoolean(s); - } catch (Exception e) { - } - } - return value; - } - - static public void appendColumnName(List columnNames, int index, String name) { - name = name.trim(); - - while (columnNames.size() <= index) { - columnNames.add(""); - } - - if (!name.isEmpty()) { - String oldName = columnNames.get(index); - if (!oldName.isEmpty()) { - name = oldName + " " + name; - } - - columnNames.set(index, name); - } - } - - static public void ensureColumnsInRowExist(List columnNames, Row row) { - int count = row.cells.size(); - while (count > columnNames.size()) { - columnNames.add(""); - } - } - - static public void setupColumns(Project project, List columnNames) { - Map nameToIndex = new HashMap(); - for (int c = 0; c < columnNames.size(); c++) { - String cell = columnNames.get(c).trim(); - if (cell.isEmpty()) { - cell = "Column"; - } else if (cell.startsWith("\"") && cell.endsWith("\"")) { - cell = cell.substring(1, cell.length() - 1).trim(); //FIXME is trimming quotation marks appropriate? - } - - if (nameToIndex.containsKey(cell)) { - int index = nameToIndex.get(cell); - nameToIndex.put(cell, index + 1); - - cell = cell.contains(" ") ? (cell + " " + index) : (cell + index); - } else { - nameToIndex.put(cell, 2); - } - - Column column = new Column(c, cell); - - project.columnModel.columns.add(column); - } - } - -} diff --git a/main/src/com/google/gridworks/importers/MarcImporter.java b/main/src/com/google/gridworks/importers/MarcImporter.java deleted file mode 100644 index 4abe94c5c..000000000 --- a/main/src/com/google/gridworks/importers/MarcImporter.java +++ /dev/null @@ -1,108 +0,0 @@ -package com.google.gridworks.importers; - -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.util.Properties; - -import org.marc4j.MarcPermissiveStreamReader; -import org.marc4j.MarcWriter; -import org.marc4j.MarcXmlWriter; -import org.marc4j.marc.Record; - -import com.google.gridworks.ProjectMetadata; -import com.google.gridworks.model.Project; - -public class MarcImporter implements StreamImporter { - - @Override - public void read( - InputStream inputStream, - Project project, - ProjectMetadata metadata, Properties options - ) throws ImportException { - int limit = ImporterUtilities.getIntegerOption("limit",options,-1); - int skip = ImporterUtilities.getIntegerOption("skip",options,0); - - File tempFile; - try { - tempFile = File.createTempFile("gridworks-import-", ".marc.xml"); - } catch (IOException e) { - throw new ImportException("Unexpected error creating temp file",e); - } - try { - OutputStream os = new FileOutputStream(tempFile); - try { - MarcPermissiveStreamReader reader = new MarcPermissiveStreamReader( - inputStream, - true, - true - ); - MarcWriter writer = new MarcXmlWriter(os, true); - - int count = 0; - while (reader.hasNext()) { - Record record = reader.next(); - if (skip <= 0) { - if (limit == -1 || count < limit) { - writer.write(record); - count++; - } else { - break; - } - } else { - skip--; - } - } - writer.close(); - } finally { - try { - os.close(); - } catch (IOException e) { - // Just ignore - not much we can do anyway - } - } - - InputStream is = new FileInputStream(tempFile); - try { - new XmlImporter().read(is, project, metadata, options); - } finally { - try { - is.close(); - } catch (IOException e) { - // Just ignore - not much we can do anyway - } - } - } catch (FileNotFoundException e) { - throw new ImportException("Input file not found", e); - } finally { - tempFile.delete(); - } - } - - @Override - public boolean canImportData(String contentType, String fileName) { - if (contentType != null) { - contentType = contentType.toLowerCase().trim(); - - if ("application/marc".equals(contentType)) { - return true; - } - } else if (fileName != null) { - fileName = fileName.toLowerCase(); - if ( - fileName.endsWith(".mrc") || - fileName.endsWith(".marc") || - fileName.contains(".mrc.") || - fileName.contains(".marc.") - ) { - return true; - } - } - return false; - } -} diff --git a/main/src/com/google/gridworks/importers/RdfTripleImporter.java b/main/src/com/google/gridworks/importers/RdfTripleImporter.java deleted file mode 100644 index a0f9219a4..000000000 --- a/main/src/com/google/gridworks/importers/RdfTripleImporter.java +++ /dev/null @@ -1,142 +0,0 @@ -package com.google.gridworks.importers; - -import java.io.IOException; -import java.io.Reader; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.Map.Entry; - -import org.jrdf.JRDFFactory; -import org.jrdf.SortedMemoryJRDFFactory; -import org.jrdf.collection.MemMapFactory; -import org.jrdf.graph.Graph; -import org.jrdf.graph.Triple; -import org.jrdf.parser.ParseException; -import org.jrdf.parser.StatementHandlerException; -import org.jrdf.parser.line.GraphLineParser; -import org.jrdf.parser.line.LineHandler; -import org.jrdf.parser.ntriples.NTriplesParserFactory; -import org.jrdf.util.ClosableIterable; -import static org.jrdf.graph.AnyObjectNode.ANY_OBJECT_NODE; -import static org.jrdf.graph.AnyPredicateNode.ANY_PREDICATE_NODE; -import static org.jrdf.graph.AnySubjectNode.ANY_SUBJECT_NODE; - -import com.google.gridworks.ProjectMetadata; -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.ModelException; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; - -public class RdfTripleImporter implements ReaderImporter{ - private JRDFFactory _jrdfFactory; - private NTriplesParserFactory _nTriplesParserFactory; - private MemMapFactory _newMapFactory; - - public RdfTripleImporter(){ - _jrdfFactory = SortedMemoryJRDFFactory.getFactory(); - _nTriplesParserFactory = new NTriplesParserFactory(); - _newMapFactory = new MemMapFactory(); - } - - @Override - public void read(Reader reader, Project project, ProjectMetadata metadata, Properties options) throws ImportException { - String baseUrl = options.getProperty("base-url"); - - Graph graph = _jrdfFactory.getNewGraph(); - LineHandler lineHandler = _nTriplesParserFactory.createParser(graph, _newMapFactory); - GraphLineParser parser = new GraphLineParser(graph, lineHandler); - try { - parser.parse(reader, baseUrl); // fills JRDF graph - } catch (IOException e) { - throw new ImportException("i/o error while parsing RDF",e); - } catch (ParseException e) { - throw new ImportException("error parsing RDF",e); - } catch (StatementHandlerException e) { - throw new ImportException("error parsing RDF",e); - } - - Map> subjectToRows = new HashMap>(); - - Column subjectColumn = new Column(0, "subject"); - project.columnModel.columns.add(0, subjectColumn); - project.columnModel.setKeyColumnIndex(0); - - ClosableIterable triples = graph.find(ANY_SUBJECT_NODE, ANY_PREDICATE_NODE, ANY_OBJECT_NODE); - try { - for (Triple triple : triples) { - String subject = triple.getSubject().toString(); - String predicate = triple.getPredicate().toString(); - String object = triple.getObject().toString(); - - Column column = project.columnModel.getColumnByName(predicate); - if (column == null) { - column = new Column(project.columnModel.allocateNewCellIndex(), predicate); - try { - project.columnModel.addColumn(-1, column, true); - } catch (ModelException e) { - // ignore - } - } - - int cellIndex = column.getCellIndex(); - if (subjectToRows.containsKey(subject)) { - List rows = subjectToRows.get(subject); - for (Row row : rows) { - if (!ExpressionUtils.isNonBlankData(row.getCellValue(cellIndex))) { - row.setCell(cellIndex, new Cell(object, null)); - object = null; - break; - } - } - - if (object != null) { - Row row = new Row(project.columnModel.getMaxCellIndex() + 1); - rows.add(row); - - row.setCell(cellIndex, new Cell(object, null)); - } - } else { - List rows = new ArrayList(); - subjectToRows.put(subject, rows); - - Row row = new Row(project.columnModel.getMaxCellIndex() + 1); - rows.add(row); - - row.setCell(subjectColumn.getCellIndex(), new Cell(subject, null)); - row.setCell(cellIndex, new Cell(object, null)); - } - } - - for (Entry> entry : subjectToRows.entrySet()) { - project.rows.addAll(entry.getValue()); - } - } finally { - triples.iterator().close(); - } - } - - - @Override - public boolean canImportData(String contentType, String fileName) { - if (contentType != null) { - contentType = contentType.toLowerCase().trim(); - - if("application/rdf+xml".equals(contentType)) { - return true; - } - } else if (fileName != null) { - fileName = fileName.toLowerCase(); - if ( - fileName.endsWith(".rdf")) { - return true; - } - } - return false; - } - -} diff --git a/main/src/com/google/gridworks/importers/ReaderImporter.java b/main/src/com/google/gridworks/importers/ReaderImporter.java deleted file mode 100644 index 48982c405..000000000 --- a/main/src/com/google/gridworks/importers/ReaderImporter.java +++ /dev/null @@ -1,30 +0,0 @@ -package com.google.gridworks.importers; - -import java.io.Reader; -import java.util.Properties; - -import com.google.gridworks.ProjectMetadata; -import com.google.gridworks.model.Project; - -/** - * Interface for importers which take a Reader as input. - */ -public interface ReaderImporter extends Importer { - - /** - * Read data from a input reader into project. - * - * @param reader - * reader to import data from. It is assumed to be positioned at - * the correct point and ready to go. - * @param project - * project which will contain data - * @param metadata - * metadata of new project - * @param options - * set of properties with import options - * @throws ImportException - */ - public void read(Reader reader, Project project, ProjectMetadata metadata, Properties options) - throws ImportException; -} diff --git a/main/src/com/google/gridworks/importers/StreamImporter.java b/main/src/com/google/gridworks/importers/StreamImporter.java deleted file mode 100644 index 1ab0b8bbe..000000000 --- a/main/src/com/google/gridworks/importers/StreamImporter.java +++ /dev/null @@ -1,21 +0,0 @@ -package com.google.gridworks.importers; - -import java.io.InputStream; -import java.util.Properties; - -import com.google.gridworks.ProjectMetadata; -import com.google.gridworks.model.Project; - -public interface StreamImporter extends Importer { - - /** - * @param inputStream stream to be imported - * @param project project to import stream into - * @param metadata metadata of new project - * @param options - * @throws ImportException - */ - public void read(InputStream inputStream, Project project, - ProjectMetadata metadata, Properties options) throws ImportException; - -} diff --git a/main/src/com/google/gridworks/importers/TsvCsvImporter.java b/main/src/com/google/gridworks/importers/TsvCsvImporter.java deleted file mode 100644 index cc75a1b57..000000000 --- a/main/src/com/google/gridworks/importers/TsvCsvImporter.java +++ /dev/null @@ -1,206 +0,0 @@ -package com.google.gridworks.importers; - -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.LineNumberReader; -import java.io.Reader; -import java.io.Serializable; -import java.util.ArrayList; -import java.util.List; -import java.util.Properties; - -import org.apache.commons.lang.StringUtils; - -import au.com.bytecode.opencsv.CSVParser; - -import com.google.gridworks.ProjectMetadata; -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; - -public class TsvCsvImporter implements ReaderImporter,StreamImporter { - - @Override - public void read(Reader reader, Project project, ProjectMetadata metadata, Properties options) throws ImportException { - boolean splitIntoColumns = ImporterUtilities.getBooleanOption("split-into-columns", options, true); - - String sep = options.getProperty("separator"); // auto-detect if not present - int ignoreLines = ImporterUtilities.getIntegerOption("ignore", options, -1); - int headerLines = ImporterUtilities.getIntegerOption("header-lines", options, 1); - - int limit = ImporterUtilities.getIntegerOption("limit",options,-1); - int skip = ImporterUtilities.getIntegerOption("skip",options,0); - boolean guessValueType = ImporterUtilities.getBooleanOption("guess-value-type", options, true); - boolean ignoreQuotes = ImporterUtilities.getBooleanOption("ignore-quotes", options, false); - - LineNumberReader lnReader = new LineNumberReader(reader); - - try { - read(lnReader, project, sep, - limit, skip, ignoreLines, headerLines, - guessValueType, splitIntoColumns, ignoreQuotes - ); - } catch (IOException e) { - throw new ImportException("Import failed",e); - } - } - - /** - * - * @param lnReader - * LineNumberReader used to read file or string contents - * @param project - * The project into which the parsed data will be added - * @param sep - * The character used to denote different the break between data points - * @param limit - * The maximum number of rows of data to import - * @param skip - * The number of initial data rows to skip - * @param ignoreLines - * The number of initial lines within the data source which should be ignored entirely - * @param headerLines - * The number of lines in the data source which describe each column - * @param guessValueType - * Whether the parser should try and guess the type of the value being parsed - * @param splitIntoColumns - * Whether the parser should try and split the data source into columns - * @param ignoreQuotes - * Quotation marks are ignored, and all separators and newlines treated as such regardless of whether they are within quoted values - * @throws IOException - */ - public void read(LineNumberReader lnReader, Project project, String sep, int limit, int skip, int ignoreLines, int headerLines, boolean guessValueType, boolean splitIntoColumns, boolean ignoreQuotes ) throws IOException{ - CSVParser parser = (sep != null && sep.length() > 0 && splitIntoColumns) ? - new CSVParser(sep.toCharArray()[0],//HACK changing string to char - won't work for multi-char separators. - CSVParser.DEFAULT_QUOTE_CHARACTER, - CSVParser.DEFAULT_ESCAPE_CHARACTER, - CSVParser.DEFAULT_STRICT_QUOTES, - CSVParser.DEFAULT_IGNORE_LEADING_WHITESPACE, - ignoreQuotes) : null; - List columnNames = new ArrayList(); - String line = null; - int rowsWithData = 0; - - while ((line = lnReader.readLine()) != null) { - if (ignoreLines > 0) { - ignoreLines--; - continue; - } else if (StringUtils.isBlank(line)) { - continue; - } - - //guess separator - if (parser == null) { - int tab = line.indexOf('\t'); - if (tab >= 0) { - parser = new CSVParser('\t', - CSVParser.DEFAULT_QUOTE_CHARACTER, - CSVParser.DEFAULT_ESCAPE_CHARACTER, - CSVParser.DEFAULT_STRICT_QUOTES, - CSVParser.DEFAULT_IGNORE_LEADING_WHITESPACE, - ignoreQuotes); - } else { - parser = new CSVParser(',', - CSVParser.DEFAULT_QUOTE_CHARACTER, - CSVParser.DEFAULT_ESCAPE_CHARACTER, - CSVParser.DEFAULT_STRICT_QUOTES, - CSVParser.DEFAULT_IGNORE_LEADING_WHITESPACE, - ignoreQuotes); - } - } - - - if (headerLines > 0) { - //column headers - headerLines--; - - ArrayList cells = getCells(line, parser, lnReader, splitIntoColumns); - - for (int c = 0; c < cells.size(); c++) { - String cell = cells.get(c).trim(); - //add column even if cell is blank - ImporterUtilities.appendColumnName(columnNames, c, cell); - } - } else { - //data - Row row = new Row(columnNames.size()); - - ArrayList cells = getCells(line, parser, lnReader, splitIntoColumns); - - if( cells != null && cells.size() > 0 ) - rowsWithData++; - - if (skip <=0 || rowsWithData > skip){ - //add parsed data to row - for(String s : cells){ - s = s.trim(); - if (ExpressionUtils.isNonBlankData(s)) { - Serializable value = guessValueType ? ImporterUtilities.parseCellValue(s) : s; - row.cells.add(new Cell(value, null)); - }else{ - row.cells.add(null); - } - } - project.rows.add(row); - project.columnModel.setMaxCellIndex(row.cells.size()); - - ImporterUtilities.ensureColumnsInRowExist(columnNames, row); - - if (limit > 0 && project.rows.size() >= limit) { - break; - } - } - } - } - - ImporterUtilities.setupColumns(project, columnNames); - } - - protected ArrayList getCells(String line, CSVParser parser, LineNumberReader lnReader, boolean splitIntoColumns) throws IOException{ - ArrayList cells = new ArrayList(); - if(splitIntoColumns){ - String[] tokens = parser.parseLineMulti(line); - for(String s : tokens){ - cells.add(s); - } - while(parser.isPending()){ - tokens = parser.parseLineMulti(lnReader.readLine()); - for(String s : tokens){ - cells.add(s); - } - } - }else{ - cells.add(line); - } - return cells; - } - - @Override - public void read(InputStream inputStream, Project project, - ProjectMetadata metadata, Properties options) throws ImportException { - read(new InputStreamReader(inputStream), project, metadata, options); - } - - @Override - public boolean canImportData(String contentType, String fileName) { - if (contentType != null) { - contentType = contentType.toLowerCase().trim(); - return - "text/plain".equals(contentType) || - "text/csv".equals(contentType) || - "text/x-csv".equals(contentType) || - "text/tab-separated-value".equals(contentType); - - } else if (fileName != null) { - fileName = fileName.toLowerCase(); - if (fileName.endsWith(".tsv")) { - return true; - }else if (fileName.endsWith(".csv")){ - return true; - } - } - return false; - } -} diff --git a/main/src/com/google/gridworks/importers/UrlImporter.java b/main/src/com/google/gridworks/importers/UrlImporter.java deleted file mode 100644 index 5013b2ba6..000000000 --- a/main/src/com/google/gridworks/importers/UrlImporter.java +++ /dev/null @@ -1,15 +0,0 @@ -package com.google.gridworks.importers; - -import java.net.URL; -import java.util.Properties; - -import com.google.gridworks.ProjectMetadata; -import com.google.gridworks.model.Project; - -public interface UrlImporter extends Importer { - - public void read(URL url, Project project, ProjectMetadata metadata, Properties options) throws Exception; - - public boolean canImportData(URL url); - -} diff --git a/main/src/com/google/gridworks/importers/XmlImportUtilities.java b/main/src/com/google/gridworks/importers/XmlImportUtilities.java deleted file mode 100644 index 47d0e7e4c..000000000 --- a/main/src/com/google/gridworks/importers/XmlImportUtilities.java +++ /dev/null @@ -1,633 +0,0 @@ -package com.google.gridworks.importers; - -import java.io.InputStream; -import java.io.Serializable; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; - -import javax.xml.stream.XMLInputFactory; -import javax.xml.stream.XMLStreamConstants; -import javax.xml.stream.XMLStreamException; -import javax.xml.stream.XMLStreamReader; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; - -public class XmlImportUtilities { - final static Logger logger = LoggerFactory.getLogger("XmlImporterUtilities"); - - /** - * An element which holds sub-elements we - * shall import as records - */ - static protected class RecordElementCandidate { - String[] path; - int count; - } - - /** - * - * - * - */ - static protected abstract class ImportVertical { - public String name = ""; - public int nonBlankCount; - - abstract void tabulate(); - } - - /** - * A column group describes a branch in tree structured data - */ - static public class ImportColumnGroup extends ImportVertical { - public Map subgroups = new HashMap(); - public Map columns = new HashMap(); - public int nextRowIndex; - - @Override - void tabulate() { - for (ImportColumn c : columns.values()) { - c.tabulate(); - nonBlankCount = Math.max(nonBlankCount, c.nonBlankCount); - } - for (ImportColumnGroup g : subgroups.values()) { - g.tabulate(); - nonBlankCount = Math.max(nonBlankCount, g.nonBlankCount); - } - } - } - - /** - * A column is used to describe a branch-terminating element in a tree structure - * - */ - static public class ImportColumn extends ImportVertical { - public int cellIndex; - public int nextRowIndex; - public boolean blankOnFirstRow; - - public ImportColumn() {} - - public ImportColumn(String name) { //required for testing - super.name = name; - } - - @Override - void tabulate() { - // already done the tabulation elsewhere - } - } - - /** - * A record describes a data element in a tree-structure - * - */ - static public class ImportRecord { - public List> rows = new LinkedList>(); - } - - static public String[] detectPathFromTag(InputStream inputStream, String tag) { - try { - XMLStreamReader parser = XMLInputFactory.newInstance().createXMLStreamReader(inputStream); - - while (parser.hasNext()) { - int eventType = parser.next(); - if (eventType == XMLStreamConstants.START_ELEMENT) { - List path = detectRecordElement(parser, tag); - if (path != null) { - String[] path2 = new String[path.size()]; - - path.toArray(path2); - - return path2; - } - } - } - } catch (Exception e) { - // silent - // e.printStackTrace(); - } - - return null; - } - - /** - * Looks for an element with the given tag name in the Xml being parsed, returning the path hierarchy to reach it. - * - * @param parser - * @param tag - * The Xml element name (can be qualified) to search for - * @return - * If the tag is found, an array of strings is returned. - * If the tag is at the top level, the tag will be the only item in the array. - * If the tag is nested beneath the top level, the array is filled with the hierarchy with the tag name at the last index - * Null if the the tag is not found. - * @throws XMLStreamException - */ - static protected List detectRecordElement(XMLStreamReader parser, String tag) throws XMLStreamException { - if(parser.getEventType() == XMLStreamConstants.START_DOCUMENT) - parser.next(); - String localName = parser.getLocalName(); - String fullName = composeName(parser.getPrefix(), localName); - if (tag.equals(parser.getLocalName()) || tag.equals(fullName)) { - List path = new LinkedList(); - path.add(localName); - - return path; - } - - while (parser.hasNext()) { - int eventType = parser.next(); - if (eventType == XMLStreamConstants.END_ELEMENT) { - break; - } else if (eventType == XMLStreamConstants.START_ELEMENT) { - List path = detectRecordElement(parser, tag); - if (path != null) { - path.add(0, localName); - return path; - } - } - } - return null; - } - - /** - * Seeks for recurring XML element in an InputStream - * which are likely candidates for being data records - * @param inputStream - * The XML data as a stream - * @return - * The path to the most numerous of the possible candidates. - * null if no candidates were found (less than 6 recurrences) - */ - static public String[] detectRecordElement(InputStream inputStream) { - logger.trace("detectRecordElement(inputStream)"); - List candidates = new ArrayList(); - - try { - XMLStreamReader parser = XMLInputFactory.newInstance().createXMLStreamReader(inputStream); - - while (parser.hasNext()) { - int eventType = parser.next(); - if (eventType == XMLStreamConstants.START_ELEMENT) { - RecordElementCandidate candidate = - detectRecordElement( - parser, - new String[] { parser.getLocalName() }); - - if (candidate != null) { - candidates.add(candidate); - } - } - } - } catch (Exception e) { - // silent - // e.printStackTrace(); - } - - if (candidates.size() > 0) { - sortRecordElementCandidates(candidates); - - return candidates.get(0).path; - } - logger.info("No candidate elements were found in Xml - at least 6 similar elements are required"); - return null; - } - - static protected RecordElementCandidate detectRecordElement(XMLStreamReader parser, String[] path) { - logger.trace("detectRecordElement(XMLStreamReader, String[])"); - List descendantCandidates = new ArrayList(); - - Map immediateChildCandidateMap = new HashMap(); - int textNodeCount = 0; - int childElementNodeCount = 0; - - try { - while (parser.hasNext()) { - int eventType = parser.next(); - if (eventType == XMLStreamConstants.END_ELEMENT) { - break; - } else if (eventType == XMLStreamConstants.CHARACTERS) { - if (parser.getText().trim().length() > 0) { - textNodeCount++; - } - } else if (eventType == XMLStreamConstants.START_ELEMENT) { - childElementNodeCount++; - - String tagName = parser.getLocalName(); - - immediateChildCandidateMap.put( - tagName, - immediateChildCandidateMap.containsKey(tagName) ? - immediateChildCandidateMap.get(tagName) + 1 : 1); - - String[] path2 = new String[path.length + 1]; - System.arraycopy(path, 0, path2, 0, path.length); - path2[path.length] = tagName; - - RecordElementCandidate c = detectRecordElement(parser, path2); - if (c != null) { - descendantCandidates.add(c); - } - } - } - } catch (Exception e) { - // silent - // e.printStackTrace(); - } - - if (textNodeCount > 0 && childElementNodeCount > 0) { - // This is a mixed element - return null; - } - - if (immediateChildCandidateMap.size() > 0) { - List immediateChildCandidates = new ArrayList(immediateChildCandidateMap.size()); - for (Entry entry : immediateChildCandidateMap.entrySet()) { - int count = entry.getValue(); - if (count > 1) { - String[] path2 = new String[path.length + 1]; - System.arraycopy(path, 0, path2, 0, path.length); - path2[path.length] = entry.getKey(); - - RecordElementCandidate candidate = new RecordElementCandidate(); - candidate.path = path2; - candidate.count = count; - immediateChildCandidates.add(candidate); - } - } - - if (immediateChildCandidates.size() > 0 && immediateChildCandidates.size() < 5) { - // There are some promising immediate child elements, but not many, - // that can serve as record elements. - - sortRecordElementCandidates(immediateChildCandidates); - - RecordElementCandidate ourCandidate = immediateChildCandidates.get(0); - logger.trace("ourCandidate.count : " + ourCandidate.count + "; immediateChildCandidates.size() : " + immediateChildCandidates.size()); - if (ourCandidate.count / immediateChildCandidates.size() > 5) { - return ourCandidate; - } - - descendantCandidates.add(ourCandidate); - } - } - - if (descendantCandidates.size() > 0) { - sortRecordElementCandidates(descendantCandidates); - - RecordElementCandidate candidate = descendantCandidates.get(0); - if (candidate.count / descendantCandidates.size() > 5) { - return candidate; - } - } - - return null; - } - - static public void sortRecordElementCandidates(List list) { - Collections.sort(list, new Comparator() { - public int compare(RecordElementCandidate o1, RecordElementCandidate o2) { - return o2.count - o1.count; - } - }); - } - - static public void importXml( - InputStream inputStream, - Project project, - String[] recordPath, - ImportColumnGroup rootColumnGroup - ) { - try { - XMLStreamReader parser = XMLInputFactory.newInstance().createXMLStreamReader(inputStream); - - while (parser.hasNext()) { - int eventType = parser.next(); - if (eventType == XMLStreamConstants.START_ELEMENT) { - findRecord(project, parser, recordPath, 0, rootColumnGroup); - } - } - } catch (Exception e) { - e.printStackTrace(); - // silent - } - } - - static public void createColumnsFromImport( - Project project, - ImportColumnGroup columnGroup - ) { - int startColumnIndex = project.columnModel.columns.size(); - - List columns = new ArrayList(columnGroup.columns.values()); - Collections.sort(columns, new Comparator() { - public int compare(ImportColumn o1, ImportColumn o2) { - if (o1.blankOnFirstRow != o2.blankOnFirstRow) { - return o1.blankOnFirstRow ? 1 : -1; - } - - int c = o2.nonBlankCount - o1.nonBlankCount; - return c != 0 ? c : (o1.name.length() - o2.name.length()); - } - }); - - for (int i = 0; i < columns.size(); i++) { - ImportColumn c = columns.get(i); - - Column column = new com.google.gridworks.model.Column(c.cellIndex, c.name); - project.columnModel.columns.add(column); - } - - List subgroups = new ArrayList(columnGroup.subgroups.values()); - Collections.sort(subgroups, new Comparator() { - public int compare(ImportColumnGroup o1, ImportColumnGroup o2) { - int c = o2.nonBlankCount - o1.nonBlankCount; - return c != 0 ? c : (o1.name.length() - o2.name.length()); - } - }); - - for (ImportColumnGroup g : subgroups) { - createColumnsFromImport(project, g); - } - - int endColumnIndex = project.columnModel.columns.size(); - int span = endColumnIndex - startColumnIndex; - if (span > 1 && span < project.columnModel.columns.size()) { - project.columnModel.addColumnGroup(startColumnIndex, span, startColumnIndex); - } - } - - /** - * - * @param project - * @param parser - * @param recordPath - * @param pathIndex - * @param rootColumnGroup - * @throws XMLStreamException - */ - static protected void findRecord( - Project project, - XMLStreamReader parser, - String[] recordPath, - int pathIndex, - ImportColumnGroup rootColumnGroup - ) throws XMLStreamException { - if(parser.getEventType() == XMLStreamConstants.START_DOCUMENT){ - logger.warn("Cannot use findRecord method for START_DOCUMENT event"); - return; - } - String tagName = parser.getLocalName(); - if (tagName.equals(recordPath[pathIndex])) { - if (pathIndex < recordPath.length - 1) { - while (parser.hasNext()) { - int eventType = parser.next(); - if (eventType == XMLStreamConstants.START_ELEMENT) { - findRecord(project, parser, recordPath, pathIndex + 1, rootColumnGroup); - } else if (eventType == XMLStreamConstants.END_ELEMENT) { - break; - } - } - } else { - processRecord(project, parser, rootColumnGroup); - } - } else { - skip(parser); - } - } - - static protected void skip(XMLStreamReader parser) throws XMLStreamException { - while (parser.hasNext()) { - int eventType = parser.next(); - if (eventType == XMLStreamConstants.START_ELEMENT) { - skip(parser); - } else if (eventType == XMLStreamConstants.END_ELEMENT) { - return; - } - } - } - - /** - * processRecord parsesXml for a single element and it's sub-elements, - * adding the parsed data as a row to the project - * @param project - * @param parser - * @param rootColumnGroup - * @throws XMLStreamException - */ - static protected void processRecord( - Project project, - XMLStreamReader parser, - ImportColumnGroup rootColumnGroup - ) throws XMLStreamException { - ImportRecord record = new ImportRecord(); - - processSubRecord(project, parser, rootColumnGroup, record); - - if (record.rows.size() > 0) { - for (List row : record.rows) { - Row realRow = new Row(row.size()); - int cellCount = 0; - - for (int c = 0; c < row.size(); c++) { - Cell cell = row.get(c); - if (cell != null) { - realRow.setCell(c, cell); - cellCount++; - } - } - - if (cellCount > 0) { - project.rows.add(realRow); - } - } - } - } - - static protected String composeName(String prefix, String localName) { - return prefix != null && prefix.length() > 0 ? (prefix + ":" + localName) : localName; - } - - /** - * - * @param project - * @param parser - * @param columnGroup - * @param record - * @throws XMLStreamException - */ - static protected void processSubRecord( - Project project, - XMLStreamReader parser, - ImportColumnGroup columnGroup, - ImportRecord record - ) throws XMLStreamException { - ImportColumnGroup thisColumnGroup = getColumnGroup( - project, - columnGroup, - composeName(parser.getPrefix(), parser.getLocalName())); - - thisColumnGroup.nextRowIndex = Math.max(thisColumnGroup.nextRowIndex, columnGroup.nextRowIndex); - - int attributeCount = parser.getAttributeCount(); - for (int i = 0; i < attributeCount; i++) { - String text = parser.getAttributeValue(i).trim(); - if (text.length() > 0) { - addCell( - project, - thisColumnGroup, - record, - composeName(parser.getAttributePrefix(i), parser.getAttributeLocalName(i)), - text - ); - } - } - - while (parser.hasNext()) { - int eventType = parser.next(); - if (eventType == XMLStreamConstants.START_ELEMENT) { - processSubRecord( - project, - parser, - thisColumnGroup, - record - ); - } else if (//eventType == XMLStreamConstants.CDATA || - eventType == XMLStreamConstants.CHARACTERS) { - String text = parser.getText().trim(); - if (text.length() > 0) { - addCell( - project, - thisColumnGroup, - record, - null, - parser.getText() - ); - } - } else if (eventType == XMLStreamConstants.END_ELEMENT) { - break; - } - } - - int nextRowIndex = thisColumnGroup.nextRowIndex; - for (ImportColumn column2 : thisColumnGroup.columns.values()) { - nextRowIndex = Math.max(nextRowIndex, column2.nextRowIndex); - } - for (ImportColumnGroup columnGroup2 : thisColumnGroup.subgroups.values()) { - nextRowIndex = Math.max(nextRowIndex, columnGroup2.nextRowIndex); - } - thisColumnGroup.nextRowIndex = nextRowIndex; - } - - static protected void addCell( - Project project, - ImportColumnGroup columnGroup, - ImportRecord record, - String columnLocalName, - String text - ) { - if (text == null || ((String) text).isEmpty()) { - return; - } - - Serializable value = ImporterUtilities.parseCellValue(text); - - ImportColumn column = getColumn(project, columnGroup, columnLocalName); - int cellIndex = column.cellIndex; - - int rowIndex = Math.max(columnGroup.nextRowIndex, column.nextRowIndex); - while (rowIndex >= record.rows.size()) { - record.rows.add(new ArrayList()); - } - - List row = record.rows.get(rowIndex); - while (cellIndex >= row.size()) { - row.add(null); - } - - logger.trace("Adding cell with value : \"" + value + "\" to row : " + rowIndex + " at cell index : " + (cellIndex-1)); - - row.set(cellIndex, new Cell(value, null)); - - column.nextRowIndex = rowIndex + 1; - column.nonBlankCount++; - } - - static protected ImportColumn getColumn( - Project project, - ImportColumnGroup columnGroup, - String localName - ) { - if (columnGroup.columns.containsKey(localName)) { - return columnGroup.columns.get(localName); - } - - ImportColumn column = createColumn(project, columnGroup, localName); - columnGroup.columns.put(localName, column); - - return column; - } - - static protected ImportColumn createColumn( - Project project, - ImportColumnGroup columnGroup, - String localName - ) { - ImportColumn newColumn = new ImportColumn(); - - newColumn.name = - columnGroup.name.length() == 0 ? - (localName == null ? "Text" : localName) : - (localName == null ? columnGroup.name : (columnGroup.name + " - " + localName)); - - newColumn.cellIndex = project.columnModel.allocateNewCellIndex(); - newColumn.nextRowIndex = columnGroup.nextRowIndex; - - return newColumn; - } - - static protected ImportColumnGroup getColumnGroup( - Project project, - ImportColumnGroup columnGroup, - String localName - ) { - if (columnGroup.subgroups.containsKey(localName)) { - return columnGroup.subgroups.get(localName); - } - - ImportColumnGroup subgroup = createColumnGroup(project, columnGroup, localName); - columnGroup.subgroups.put(localName, subgroup); - - return subgroup; - } - - static protected ImportColumnGroup createColumnGroup( - Project project, - ImportColumnGroup columnGroup, - String localName - ) { - ImportColumnGroup newGroup = new ImportColumnGroup(); - - newGroup.name = - columnGroup.name.length() == 0 ? - (localName == null ? "Text" : localName) : - (localName == null ? columnGroup.name : (columnGroup.name + " - " + localName)); - - newGroup.nextRowIndex = columnGroup.nextRowIndex; - - return newGroup; - } -} diff --git a/main/src/com/google/gridworks/importers/XmlImporter.java b/main/src/com/google/gridworks/importers/XmlImporter.java deleted file mode 100644 index 3aa7c4893..000000000 --- a/main/src/com/google/gridworks/importers/XmlImporter.java +++ /dev/null @@ -1,91 +0,0 @@ -package com.google.gridworks.importers; - -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.PushbackInputStream; -import java.util.Properties; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.google.gridworks.ProjectMetadata; -import com.google.gridworks.importers.XmlImportUtilities.ImportColumnGroup; -import com.google.gridworks.model.Project; - -public class XmlImporter implements StreamImporter { - - final static Logger logger = LoggerFactory.getLogger("XmlImporter"); - - public static final int BUFFER_SIZE = 64 * 1024; - - @Override - public void read( - InputStream inputStream, - Project project, - ProjectMetadata metadata, Properties options - ) throws ImportException { - logger.trace("XmlImporter.read"); - PushbackInputStream pis = new PushbackInputStream(inputStream,BUFFER_SIZE); - - String[] recordPath = null; - { - byte[] buffer = new byte[BUFFER_SIZE]; - int bytes_read = 0; - try { - while (bytes_read < BUFFER_SIZE) { - int c = pis.read(buffer, bytes_read, BUFFER_SIZE - bytes_read); - if (c == -1) break; - bytes_read +=c ; - } - pis.unread(buffer, 0, bytes_read); - } catch (IOException e) { - throw new ImportException("Read error",e); - } - - if (options.containsKey("importer-record-tag")) { - recordPath = XmlImportUtilities.detectPathFromTag( - new ByteArrayInputStream(buffer, 0, bytes_read), - options.getProperty("importer-record-tag")); - } else { - recordPath = XmlImportUtilities.detectRecordElement( - new ByteArrayInputStream(buffer, 0, bytes_read)); - } - } - - if (recordPath == null) - return; - - ImportColumnGroup rootColumnGroup = new ImportColumnGroup(); - - XmlImportUtilities.importXml(pis, project, recordPath, rootColumnGroup); - XmlImportUtilities.createColumnsFromImport(project, rootColumnGroup); - - project.columnModel.update(); - } - - @Override - public boolean canImportData(String contentType, String fileName) { - if (contentType != null) { - contentType = contentType.toLowerCase().trim(); - - if("application/xml".equals(contentType) || - "text/xml".equals(contentType) || - "application/rss+xml".equals(contentType) || - "application/atom+xml".equals(contentType)) { - return true; - } - } else if (fileName != null) { - fileName = fileName.toLowerCase(); - if ( - fileName.endsWith(".xml") || - fileName.endsWith(".atom") || - fileName.endsWith(".rss") - ) { - return true; - } - } - return false; - } - -} diff --git a/main/src/com/google/gridworks/importers/parsers/NonSplitRowParser.java b/main/src/com/google/gridworks/importers/parsers/NonSplitRowParser.java deleted file mode 100644 index 21c7e77d7..000000000 --- a/main/src/com/google/gridworks/importers/parsers/NonSplitRowParser.java +++ /dev/null @@ -1,39 +0,0 @@ -package com.google.gridworks.importers.parsers; - -import java.io.LineNumberReader; -import java.io.Serializable; -import java.util.ArrayList; -import java.util.List; - -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.importers.ImporterUtilities; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Row; - -public class NonSplitRowParser extends RowParser { - - public List split(String line, LineNumberReader lineReader) { - List results = new ArrayList(1); - - results.add(line.trim()); - - return results; - } - - public boolean parseRow(Row row, String line, boolean guessValueType, LineNumberReader lineReader) { - line = line.trim(); - if (line.isEmpty()) { - return false; - } else { - Serializable value = guessValueType ? ImporterUtilities.parseCellValue(line) : line; - if (ExpressionUtils.isNonBlankData(value)) { - row.cells.add(new Cell(value, null)); - return true; - } else { - row.cells.add(null); - return false; - } - } - } - -} diff --git a/main/src/com/google/gridworks/importers/parsers/RowParser.java b/main/src/com/google/gridworks/importers/parsers/RowParser.java deleted file mode 100644 index 01d1b7a96..000000000 --- a/main/src/com/google/gridworks/importers/parsers/RowParser.java +++ /dev/null @@ -1,12 +0,0 @@ -package com.google.gridworks.importers.parsers; - -import java.io.LineNumberReader; -import java.util.List; - -import com.google.gridworks.model.Row; - -public abstract class RowParser { - public abstract List split(String line, LineNumberReader lineReader); - - public abstract boolean parseRow(Row row, String line, boolean guessValueType, LineNumberReader lineReader); -} diff --git a/main/src/com/google/gridworks/importers/parsers/SeparatorRowParser.java b/main/src/com/google/gridworks/importers/parsers/SeparatorRowParser.java deleted file mode 100644 index f24c22e00..000000000 --- a/main/src/com/google/gridworks/importers/parsers/SeparatorRowParser.java +++ /dev/null @@ -1,52 +0,0 @@ -package com.google.gridworks.importers.parsers; - -import java.io.LineNumberReader; -import java.io.Serializable; -import java.util.ArrayList; -import java.util.List; - -import org.apache.commons.lang.StringUtils; - -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.importers.ImporterUtilities; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Row; - -public class SeparatorRowParser extends RowParser { - - String sep; - - public SeparatorRowParser(String sep) { - this.sep = sep; - } - - public List split(String line, LineNumberReader lineReader) { - String[] cells = StringUtils.splitPreserveAllTokens(line, sep); - - List results = new ArrayList(); - for (int c = 0; c < cells.length; c++) { - results.add(cells[c]); - } - - return results; - } - - public boolean parseRow(Row row, String line, boolean guessValueType, LineNumberReader lineReader) { - boolean hasData = false; - - String[] cells = StringUtils.splitPreserveAllTokens(line, sep); - for (int c = 0; c < cells.length; c++) { - String text = cells[c]; - - Serializable value = guessValueType ? ImporterUtilities.parseCellValue(text) : text; - if (ExpressionUtils.isNonBlankData(value)) { - row.cells.add(new Cell(value, null)); - hasData = true; - } else { - row.cells.add(null); - } - } - return hasData; - } - -} diff --git a/main/src/com/google/gridworks/io/FileHistoryEntryManager.java b/main/src/com/google/gridworks/io/FileHistoryEntryManager.java deleted file mode 100644 index 425b9dc40..000000000 --- a/main/src/com/google/gridworks/io/FileHistoryEntryManager.java +++ /dev/null @@ -1,109 +0,0 @@ -package com.google.gridworks.io; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.InputStreamReader; -import java.util.Properties; -import java.util.zip.ZipEntry; -import java.util.zip.ZipFile; -import java.util.zip.ZipOutputStream; -import java.io.Writer; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.ProjectManager; -import com.google.gridworks.history.History; -import com.google.gridworks.history.HistoryEntry; -import com.google.gridworks.history.HistoryEntryManager; -import com.google.gridworks.util.Pool; - - -public class FileHistoryEntryManager implements HistoryEntryManager{ - - public void delete(HistoryEntry historyEntry) { - File file = getChangeFile(historyEntry); - if (file.exists()) { - file.delete(); - } - } - - public void save(HistoryEntry historyEntry, Writer writer, Properties options) { - JSONWriter jsonWriter = new JSONWriter(writer); - try { - historyEntry.write(jsonWriter, options); - } catch (JSONException e) { - e.printStackTrace(); - } - } - - public void loadChange(HistoryEntry historyEntry) { - File changeFile = getChangeFile(historyEntry); - - try { - loadChange(historyEntry, changeFile); - } catch (Exception e) { - throw new RuntimeException("Failed to load change file " + changeFile.getAbsolutePath(), e); - } - } - - protected void loadChange(HistoryEntry historyEntry, File file) throws Exception { - ZipFile zipFile = new ZipFile(file); - try { - Pool pool = new Pool(); - ZipEntry poolEntry = zipFile.getEntry("pool.txt"); - if (poolEntry != null) { - pool.load(new InputStreamReader( - zipFile.getInputStream(poolEntry))); - } // else, it's a legacy project file - - historyEntry.setChange(History.readOneChange( - zipFile.getInputStream(zipFile.getEntry("change.txt")), pool)); - } finally { - zipFile.close(); - } - } - - public void saveChange(HistoryEntry historyEntry) throws Exception { - File changeFile = getChangeFile(historyEntry); - if (!(changeFile.exists())) { - saveChange(historyEntry, changeFile); - } - } - - protected void saveChange(HistoryEntry historyEntry, File file) throws Exception { - ZipOutputStream out = new ZipOutputStream(new FileOutputStream(file)); - try { - Pool pool = new Pool(); - - out.putNextEntry(new ZipEntry("change.txt")); - try { - History.writeOneChange(out, historyEntry.getChange(), pool); - } finally { - out.closeEntry(); - } - - out.putNextEntry(new ZipEntry("pool.txt")); - try { - pool.save(out); - } finally { - out.closeEntry(); - } - } finally { - out.close(); - } - } - - protected File getChangeFile(HistoryEntry historyEntry) { - return new File(getHistoryDir(historyEntry), historyEntry.id + ".change.zip"); - } - - protected File getHistoryDir(HistoryEntry historyEntry) { - File dir = new File(((FileProjectManager)ProjectManager.singleton) - .getProjectDir(historyEntry.projectID), - "history"); - dir.mkdirs(); - - return dir; - } -} \ No newline at end of file diff --git a/main/src/com/google/gridworks/io/FileProjectManager.java b/main/src/com/google/gridworks/io/FileProjectManager.java deleted file mode 100644 index c9b0fb5e3..000000000 --- a/main/src/com/google/gridworks/io/FileProjectManager.java +++ /dev/null @@ -1,367 +0,0 @@ -package com.google.gridworks.io; - -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.util.Properties; -import java.util.zip.GZIPInputStream; - -import org.apache.tools.tar.TarEntry; -import org.apache.tools.tar.TarInputStream; -import org.apache.tools.tar.TarOutputStream; -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONTokener; -import org.json.JSONWriter; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.google.gridworks.ProjectManager; -import com.google.gridworks.ProjectMetadata; -import com.google.gridworks.history.HistoryEntryManager; -import com.google.gridworks.model.Project; -import com.google.gridworks.preference.TopList; - -public class FileProjectManager extends ProjectManager { - final static protected String s_projectDirNameSuffix = ".project"; - - protected File _workspaceDir; - - final static Logger logger = LoggerFactory.getLogger("FileProjectManager"); - - static public synchronized void initialize(File dir) { - if (singleton == null) { - logger.info("Using workspace directory: {}", dir.getAbsolutePath()); - singleton = new FileProjectManager(dir); - } - - } - - protected FileProjectManager(File dir) { - super(); - _workspaceDir = dir; - _workspaceDir.mkdirs(); - - load(); - recover(); - } - - public File getWorkspaceDir() { - return _workspaceDir; - } - - static public File getProjectDir(File workspaceDir, long projectID) { - File dir = new File(workspaceDir, projectID + s_projectDirNameSuffix); - if (!dir.exists()) { - dir.mkdir(); - } - return dir; - } - - public File getProjectDir(long projectID) { - return getProjectDir(_workspaceDir, projectID); - } - - /** - * Import an external project that has been received as a .tar file, expanded, and - * copied into our workspace directory. - * - * @param projectID - */ - public boolean loadProjectMetadata(long projectID) { - synchronized (this) { - ProjectMetadata metadata = ProjectMetadataUtilities.load(getProjectDir(projectID)); - if (metadata != null) { - _projectsMetadata.put(projectID, metadata); - return true; - } else { - return false; - } - } - } - - public void importProject(long projectID, InputStream inputStream, boolean gziped) throws IOException { - File destDir = this.getProjectDir(projectID); - destDir.mkdirs(); - - if (gziped) { - GZIPInputStream gis = new GZIPInputStream(inputStream); - untar(destDir, gis); - } else { - untar(destDir, inputStream); - } - } - - protected void untar(File destDir, InputStream inputStream) throws IOException { - TarInputStream tin = new TarInputStream(inputStream); - TarEntry tarEntry = null; - - while ((tarEntry = tin.getNextEntry()) != null) { - File destEntry = new File(destDir, tarEntry.getName()); - File parent = destEntry.getParentFile(); - - if (!parent.exists()) { - parent.mkdirs(); - } - - if (tarEntry.isDirectory()) { - destEntry.mkdirs(); - } else { - FileOutputStream fout = new FileOutputStream(destEntry); - try { - tin.copyEntryContents(fout); - } finally { - fout.close(); - } - } - } - } - - public void exportProject(long projectId, TarOutputStream tos) throws IOException { - File dir = this.getProjectDir(projectId); - this.tarDir("", dir, tos); - } - - protected void tarDir(String relative, File dir, TarOutputStream tos) throws IOException{ - File[] files = dir.listFiles(); - for (File file : files) { - if (!file.isHidden()) { - String path = relative + file.getName(); - - if (file.isDirectory()) { - tarDir(path + File.separator, file, tos); - } else { - TarEntry entry = new TarEntry(path); - - entry.setMode(TarEntry.DEFAULT_FILE_MODE); - entry.setSize(file.length()); - entry.setModTime(file.lastModified()); - - tos.putNextEntry(entry); - - copyFile(file, tos); - - tos.closeEntry(); - } - } - } - } - - protected void copyFile(File file, OutputStream os) throws IOException { - final int buffersize = 4096; - - FileInputStream fis = new FileInputStream(file); - try { - byte[] buf = new byte[buffersize]; - int count; - - while((count = fis.read(buf, 0, buffersize)) != -1) { - os.write(buf, 0, count); - } - } finally { - fis.close(); - } - } - - @Override - protected void saveMetadata(ProjectMetadata metadata, long projectId) throws Exception { - File projectDir = getProjectDir(projectId); - ProjectMetadataUtilities.save(metadata, projectDir); - } - - @Override - protected void saveProject(Project project){ - ProjectUtilities.save(project); - } - - public Project loadProject(long id) { - return ProjectUtilities.load(getProjectDir(id), id); - } - - - - /** - * Save the workspace's data out to file in a safe way: save to a temporary file first - * and rename it to the real file. - */ - @Override - protected void saveWorkspace() { - synchronized (this) { - File tempFile = new File(_workspaceDir, "workspace.temp.json"); - try { - saveToFile(tempFile); - } catch (Exception e) { - e.printStackTrace(); - - logger.warn("Failed to save workspace"); - return; - } - - File file = new File(_workspaceDir, "workspace.json"); - File oldFile = new File(_workspaceDir, "workspace.old.json"); - - if (file.exists()) { - file.renameTo(oldFile); - } - - tempFile.renameTo(file); - if (oldFile.exists()) { - oldFile.delete(); - } - - logger.info("Saved workspace"); - } - } - - protected void saveToFile(File file) throws IOException, JSONException { - FileWriter writer = new FileWriter(file); - try { - JSONWriter jsonWriter = new JSONWriter(writer); - jsonWriter.object(); - jsonWriter.key("projectIDs"); - jsonWriter.array(); - for (Long id : _projectsMetadata.keySet()) { - ProjectMetadata metadata = _projectsMetadata.get(id); - if (metadata != null) { - jsonWriter.value(id); - - try { - ProjectMetadataUtilities.save(metadata, getProjectDir(id)); - } catch (Exception e) { - e.printStackTrace(); - } - } - } - jsonWriter.endArray(); - writer.write('\n'); - - jsonWriter.key("preferences"); - _preferenceStore.write(jsonWriter, new Properties()); - - jsonWriter.endObject(); - } finally { - writer.close(); - } - } - - - - public void deleteProject(long projectID) { - synchronized (this) { - removeProject(projectID); - - File dir = getProjectDir(projectID); - if (dir.exists()) { - deleteDir(dir); - } - } - - saveWorkspace(); - } - - static protected void deleteDir(File dir) { - for (File file : dir.listFiles()) { - if (file.isDirectory()) { - deleteDir(file); - } else { - file.delete(); - } - } - dir.delete(); - } - - protected void load() { - if (loadFromFile(new File(_workspaceDir, "workspace.json"))) return; - if (loadFromFile(new File(_workspaceDir, "workspace.temp.json"))) return; - if (loadFromFile(new File(_workspaceDir, "workspace.old.json"))) return; - } - - protected boolean loadFromFile(File file) { - logger.info("Loading workspace: {}", file.getAbsolutePath()); - - _projectsMetadata.clear(); - - boolean found = false; - - if (file.exists() || file.canRead()) { - FileReader reader = null; - try { - reader = new FileReader(file); - JSONTokener tokener = new JSONTokener(reader); - JSONObject obj = (JSONObject) tokener.nextValue(); - - JSONArray a = obj.getJSONArray("projectIDs"); - int count = a.length(); - for (int i = 0; i < count; i++) { - long id = a.getLong(i); - - File projectDir = getProjectDir(id); - ProjectMetadata metadata = ProjectMetadataUtilities.load(projectDir); - - _projectsMetadata.put(id, metadata); - } - - if (obj.has("preferences") && !obj.isNull("preferences")) { - _preferenceStore.load(obj.getJSONObject("preferences")); - } - - if (obj.has("expressions") && !obj.isNull("expressions")) { // backward compatibility - ((TopList) _preferenceStore.get("scripting.expressions")) - .load(obj.getJSONArray("expressions")); - } - - found = true; - } catch (JSONException e) { - logger.warn("Error reading file", e); - } catch (IOException e) { - logger.warn("Error reading file", e); - } finally { - try { - reader.close(); - } catch (IOException e) { - logger.warn("Exception closing file",e); - } - } - } - - return found; - } - - protected void recover() { - for (File file : _workspaceDir.listFiles()) { - if (file.isDirectory() && !file.isHidden()) { - String name = file.getName(); - if (file.getName().endsWith(s_projectDirNameSuffix)) { - String idString = name.substring(0, name.length() - s_projectDirNameSuffix.length()); - long id = -1; - try { - id = Long.parseLong(idString); - } catch (NumberFormatException e) { - // ignore - } - - if (id > 0 && !_projectsMetadata.containsKey(id)) { - if (loadProjectMetadata(id)) { - logger.info( - "Recovered project named " + - getProjectMetadata(id).getName() + - " in directory " + name); - } else { - logger.warn("Failed to recover project in directory " + name); - } - } - } - } - } - } - - public HistoryEntryManager getHistoryEntryManager(){ - return new FileHistoryEntryManager(); - } -} diff --git a/main/src/com/google/gridworks/io/ProjectMetadataUtilities.java b/main/src/com/google/gridworks/io/ProjectMetadataUtilities.java deleted file mode 100644 index bcb0cdeff..000000000 --- a/main/src/com/google/gridworks/io/ProjectMetadataUtilities.java +++ /dev/null @@ -1,85 +0,0 @@ -package com.google.gridworks.io; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.FileReader; -import java.io.OutputStreamWriter; -import java.io.Writer; - -import org.json.JSONObject; -import org.json.JSONTokener; -import org.json.JSONWriter; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.google.gridworks.ProjectMetadata; - - -public class ProjectMetadataUtilities { - final static Logger logger = LoggerFactory.getLogger("project_metadata_utilities"); - - public static void save(ProjectMetadata projectMeta, File projectDir) throws Exception { - File tempFile = new File(projectDir, "metadata.temp.json"); - try { - saveToFile(projectMeta, tempFile); - } catch (Exception e) { - e.printStackTrace(); - - logger.warn("Failed to save project metadata"); - return; - } - - File file = new File(projectDir, "metadata.json"); - File oldFile = new File(projectDir, "metadata.old.json"); - - if (file.exists()) { - file.renameTo(oldFile); - } - - tempFile.renameTo(file); - if (oldFile.exists()) { - oldFile.delete(); - } - } - - protected static void saveToFile(ProjectMetadata projectMeta, File metadataFile) throws Exception { - Writer writer = new OutputStreamWriter(new FileOutputStream(metadataFile)); - try { - JSONWriter jsonWriter = new JSONWriter(writer); - projectMeta.write(jsonWriter); - } finally { - writer.close(); - } - } - - static public ProjectMetadata load(File projectDir) { - try { - return loadFromFile(new File(projectDir, "metadata.json")); - } catch (Exception e) { - } - - try { - return loadFromFile(new File(projectDir, "metadata.temp.json")); - } catch (Exception e) { - } - - try { - return loadFromFile(new File(projectDir, "metadata.old.json")); - } catch (Exception e) { - } - - return null; - } - - static protected ProjectMetadata loadFromFile(File metadataFile) throws Exception { - FileReader reader = new FileReader(metadataFile); - try { - JSONTokener tokener = new JSONTokener(reader); - JSONObject obj = (JSONObject) tokener.nextValue(); - - return ProjectMetadata.loadFromJSON(obj); - } finally { - reader.close(); - } - } -} diff --git a/main/src/com/google/gridworks/io/ProjectUtilities.java b/main/src/com/google/gridworks/io/ProjectUtilities.java deleted file mode 100644 index f4db138bc..000000000 --- a/main/src/com/google/gridworks/io/ProjectUtilities.java +++ /dev/null @@ -1,134 +0,0 @@ -package com.google.gridworks.io; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.InputStreamReader; -import java.io.LineNumberReader; -import java.util.zip.ZipEntry; -import java.util.zip.ZipFile; -import java.util.zip.ZipOutputStream; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.google.gridworks.ProjectManager; -import com.google.gridworks.model.Project; -import com.google.gridworks.util.Pool; - - -public class ProjectUtilities { - final static Logger logger = LoggerFactory.getLogger("project_utilities"); - - synchronized public static void save(Project project) { - synchronized (project) { - long id = project.id; - File dir = ((FileProjectManager)ProjectManager.singleton).getProjectDir(id); - - File tempFile = new File(dir, "data.temp.zip"); - try { - saveToFile(project, tempFile); - } catch (Exception e) { - e.printStackTrace(); - - logger.warn("Failed to save project {}", id); - return; - } - - File file = new File(dir, "data.zip"); - File oldFile = new File(dir, "data.old.zip"); - - if (file.exists()) { - file.renameTo(oldFile); - } - - tempFile.renameTo(file); - if (oldFile.exists()) { - oldFile.delete(); - } - - project.setLastSave(); - - logger.info("Saved project '{}'",id); - } - } - - protected static void saveToFile(Project project, File file) throws Exception { - ZipOutputStream out = new ZipOutputStream(new FileOutputStream(file)); - try { - Pool pool = new Pool(); - - out.putNextEntry(new ZipEntry("data.txt")); - try { - project.saveToOutputStream(out, pool); - } finally { - out.closeEntry(); - } - - out.putNextEntry(new ZipEntry("pool.txt")); - try { - pool.save(out); - } finally { - out.closeEntry(); - } - } finally { - out.close(); - } - } - - static public Project load(File dir, long id) { - try { - File file = new File(dir, "data.zip"); - if (file.exists()) { - return loadFromFile(file, id); - } - } catch (Exception e) { - e.printStackTrace(); - } - - try { - File file = new File(dir, "data.temp.zip"); - if (file.exists()) { - return loadFromFile(file, id); - } - } catch (Exception e) { - e.printStackTrace(); - } - - try { - File file = new File(dir, "data.old.zip"); - if (file.exists()) { - return loadFromFile(file, id); - } - } catch (Exception e) { - e.printStackTrace(); - } - - return null; - } - - static protected Project loadFromFile( - File file, - long id - ) throws Exception { - ZipFile zipFile = new ZipFile(file); - try { - Pool pool = new Pool(); - ZipEntry poolEntry = zipFile.getEntry("pool.txt"); - if (poolEntry != null) { - pool.load(new InputStreamReader( - zipFile.getInputStream(poolEntry))); - } // else, it's a legacy project file - - return Project.loadFromReader( - new LineNumberReader( - new InputStreamReader( - zipFile.getInputStream( - zipFile.getEntry("data.txt")))), - id, - pool - ); - } finally { - zipFile.close(); - } - } -} diff --git a/main/src/com/google/gridworks/logging/IndentingLayout.java b/main/src/com/google/gridworks/logging/IndentingLayout.java deleted file mode 100644 index 7b3776b37..000000000 --- a/main/src/com/google/gridworks/logging/IndentingLayout.java +++ /dev/null @@ -1,143 +0,0 @@ -package com.google.gridworks.logging; - -/* - * Copyright (c) Massachusetts Institute of Technology, 2007 - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Original code: http://simile.mit.edu/repository/tracer/trunk/ - */ - -import java.util.Calendar; -import java.util.Date; - -import org.apache.log4j.Layout; -import org.apache.log4j.spi.LoggingEvent; - -/** - * This is a special Log4j log formatter that is capable of reacting on special log messages - * and 'indent' the logs accordingly. This is very useful to visually inspect a debug log - * and see what calls what. An example of logs are "> method()" and "< method()" where > and < - * are used to indicate respectively "entering" and "exiting". - */ -public class IndentingLayout extends Layout { - - protected static final int CONTEXT_SIZE = 25; - protected static final long MAX_DELTA = 10000; - - protected Calendar calendar = Calendar.getInstance(); - protected long previousTime = 0; - protected int indentation = 0; - - public void activateOptions() { - // no options at this time - } - - public String format(LoggingEvent event) { - String message = event.getRenderedMessage(); - if (message == null) return ""; - if (message.length() < 2) return message; - - char leader = message.charAt(0); - char secondLeader = message.charAt(1); - if ((leader == '<') && (secondLeader == ' ') && (this.indentation > 0)) this.indentation--; - - // Reset buf - StringBuffer buf = new StringBuffer(256); - - Date date = new Date(); - long now = date.getTime(); - calendar.setTime(date); - - long delta = 0; - if (previousTime > 0) { - delta = now - previousTime; - } - previousTime = now; - -// if ((previousTime == 0) || (delta > MAX_DELTA)) { -// buf.append('\n'); -// indentation = 0; // reset indentation after a while, as we might -// // have runaway/unmatched log entries -// } - - int hour = calendar.get(Calendar.HOUR_OF_DAY); - if (hour < 10) buf.append('0'); - buf.append(hour); - buf.append(':'); - - int mins = calendar.get(Calendar.MINUTE); - if (mins < 10) buf.append('0'); - buf.append(mins); - buf.append(':'); - - int secs = calendar.get(Calendar.SECOND); - if (secs < 10) buf.append('0'); - buf.append(secs); - buf.append('.'); - - int millis = (int) (now % 1000); - if (millis < 100) buf.append('0'); - if (millis < 10) buf.append('0'); - buf.append(millis); - - buf.append(" ["); - String context = ((String) event.getMDC("LogEvent")); - if (context == null) { - context = event.getLoggerName(); - } - if (context.length() < CONTEXT_SIZE) { - pad(buf, CONTEXT_SIZE - context.length(), ' '); - buf.append(context); - } else { - buf.append(".."); - buf.append(context.substring(context.length() - CONTEXT_SIZE + 2)); - } - buf.append("] "); - - pad(buf, indentation, ' '); - - buf.append(message); - - buf.append(" ("); - buf.append(delta); - buf.append("ms)\n"); - - if ((leader == '>') && (secondLeader == ' ')) indentation++; - - return buf.toString(); - } - - private void pad(StringBuffer buffer, int pads, char padchar) { - for (int i = 0; i < pads; i++) { - buffer.append(padchar); - } - } - - public boolean ignoresThrowable() { - return true; - } -} diff --git a/main/src/com/google/gridworks/model/AbstractOperation.java b/main/src/com/google/gridworks/model/AbstractOperation.java deleted file mode 100644 index af0c0406e..000000000 --- a/main/src/com/google/gridworks/model/AbstractOperation.java +++ /dev/null @@ -1,31 +0,0 @@ -package com.google.gridworks.model; - -import java.util.Properties; - -import com.google.gridworks.Jsonizable; -import com.google.gridworks.history.HistoryEntry; -import com.google.gridworks.process.Process; -import com.google.gridworks.process.QuickHistoryEntryProcess; - -/* - * An abstract operation can be applied to different but similar - * projects. - */ -abstract public class AbstractOperation implements Jsonizable { - public Process createProcess(Project project, Properties options) throws Exception { - return new QuickHistoryEntryProcess(project, getBriefDescription(null)) { - @Override - protected HistoryEntry createHistoryEntry(long historyEntryID) throws Exception { - return AbstractOperation.this.createHistoryEntry(_project, historyEntryID); - } - }; - } - - protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception { - throw new UnsupportedOperationException(); - } - - protected String getBriefDescription(Project project) { - throw new UnsupportedOperationException(); - } -} diff --git a/main/src/com/google/gridworks/model/Cell.java b/main/src/com/google/gridworks/model/Cell.java deleted file mode 100644 index eb7269c49..000000000 --- a/main/src/com/google/gridworks/model/Cell.java +++ /dev/null @@ -1,149 +0,0 @@ -package com.google.gridworks.model; - -import java.io.Serializable; -import java.io.Writer; -import java.util.Calendar; -import java.util.Date; -import java.util.Properties; - -import org.codehaus.jackson.JsonFactory; -import org.codehaus.jackson.JsonParser; -import org.codehaus.jackson.JsonToken; -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.Jsonizable; -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.expr.HasFields; -import com.google.gridworks.util.ParsingUtilities; -import com.google.gridworks.util.Pool; - -public class Cell implements HasFields, Jsonizable { - final public Serializable value; - final public Recon recon; - - public Cell(Serializable value, Recon recon) { - this.value = value; - this.recon = recon; - } - - public Object getField(String name, Properties bindings) { - if ("value".equals(name)) { - return value; - } else if ("recon".equals(name)) { - return recon; - } - return null; - } - - public boolean fieldAlsoHasFields(String name) { - return "recon".equals(name); - } - - public void write(JSONWriter writer, Properties options) throws JSONException { - writer.object(); - if (ExpressionUtils.isError(value)) { - writer.key("e"); - writer.value(((EvalError) value).message); - } else { - writer.key("v"); - if (value != null) { - if (value instanceof Calendar) { - writer.value(ParsingUtilities.dateToString(((Calendar) value).getTime())); - writer.key("t"); writer.value("date"); - } else if (value instanceof Date) { - writer.value(ParsingUtilities.dateToString((Date) value)); - writer.key("t"); writer.value("date"); - } else { - writer.value(value); - } - } else { - writer.value(null); - } - } - - if (recon != null) { - writer.key("r"); - writer.value(Long.toString(recon.id)); - - Pool pool = (Pool) options.get("pool"); - pool.pool(recon); - } - writer.endObject(); - } - - public void save(Writer writer, Properties options) { - JSONWriter jsonWriter = new JSONWriter(writer); - try { - write(jsonWriter, options); - } catch (JSONException e) { - e.printStackTrace(); - } - } - - static public Cell loadStreaming(String s, Pool pool) throws Exception { - JsonFactory jsonFactory = new JsonFactory(); - JsonParser jp = jsonFactory.createJsonParser(s); - - if (jp.nextToken() != JsonToken.START_OBJECT) { - return null; - } - - return loadStreaming(jp, pool); - } - - static public Cell loadStreaming(JsonParser jp, Pool pool) throws Exception { - JsonToken t = jp.getCurrentToken(); - if (t == JsonToken.VALUE_NULL || t != JsonToken.START_OBJECT) { - return null; - } - - Serializable value = null; - String type = null; - Recon recon = null; - - while (jp.nextToken() != JsonToken.END_OBJECT) { - String fieldName = jp.getCurrentName(); - jp.nextToken(); - - if ("r".equals(fieldName)) { - if (jp.getCurrentToken() == JsonToken.VALUE_STRING) { - String reconID = jp.getText(); - - recon = pool.getRecon(reconID); - } else { - // legacy - recon = Recon.loadStreaming(jp, pool); - } - } else if ("e".equals(fieldName)) { - value = new EvalError(jp.getText()); - } else if ("v".equals(fieldName)) { - JsonToken token = jp.getCurrentToken(); - - if (token == JsonToken.VALUE_STRING) { - value = jp.getText(); - } else if (token == JsonToken.VALUE_NUMBER_INT) { - value = jp.getLongValue(); - } else if (token == JsonToken.VALUE_NUMBER_FLOAT) { - value = jp.getDoubleValue(); - } else if (token == JsonToken.VALUE_TRUE) { - value = true; - } else if (token == JsonToken.VALUE_FALSE) { - value = false; - } - } else if ("t".equals(fieldName)) { - type = jp.getText(); - } - } - - if (value != null) { - if (type != null && "date".equals(type)) { - value = ParsingUtilities.stringToDate((String) value); - } - return new Cell(value, recon); - } else { - return null; - } - } -} diff --git a/main/src/com/google/gridworks/model/Column.java b/main/src/com/google/gridworks/model/Column.java deleted file mode 100644 index 5d3f27813..000000000 --- a/main/src/com/google/gridworks/model/Column.java +++ /dev/null @@ -1,123 +0,0 @@ -package com.google.gridworks.model; - -import java.io.Writer; -import java.util.HashMap; -import java.util.Map; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.Jsonizable; -import com.google.gridworks.model.recon.ReconConfig; -import com.google.gridworks.util.ParsingUtilities; - -public class Column implements Jsonizable { - final private int _cellIndex; - final private String _originalName; - private String _name; - private ReconConfig _reconConfig; - private ReconStats _reconStats; - - transient protected Map _precomputes; - - public Column(int cellIndex, String originalName) { - _cellIndex = cellIndex; - _originalName = _name = originalName; - } - - public int getCellIndex() { - return _cellIndex; - } - - public String getOriginalHeaderLabel() { - return _originalName; - } - - public void setName(String name) { - this._name = name; - } - - public String getName() { - return _name; - } - - public void setReconConfig(ReconConfig config) { - this._reconConfig = config; - } - - public ReconConfig getReconConfig() { - return _reconConfig; - } - - public void setReconStats(ReconStats stats) { - this._reconStats = stats; - } - - public ReconStats getReconStats() { - return _reconStats; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("cellIndex"); writer.value(_cellIndex); - writer.key("originalName"); writer.value(_originalName); - writer.key("name"); writer.value(_name); - if (_reconConfig != null) { - writer.key("reconConfig"); - _reconConfig.write(writer, options); - } - if (_reconStats != null) { - writer.key("reconStats"); - _reconStats.write(writer, options); - } - writer.endObject(); - } - - public void clearPrecomputes() { - if (_precomputes != null) { - _precomputes.clear(); - } - } - - public Object getPrecompute(String key) { - if (_precomputes != null) { - return _precomputes.get(key); - } - return null; - } - - public void setPrecompute(String key, Object value) { - if (_precomputes == null) { - _precomputes = new HashMap(); - } - _precomputes.put(key, value); - } - - public void save(Writer writer) { - JSONWriter jsonWriter = new JSONWriter(writer); - try { - write(jsonWriter, new Properties()); - } catch (JSONException e) { - e.printStackTrace(); - } - } - - static public Column load(String s) throws Exception { - JSONObject obj = ParsingUtilities.evaluateJsonStringToObject(s); - Column column = new Column(obj.getInt("cellIndex"), obj.getString("originalName")); - - column._name = obj.getString("name"); - if (obj.has("reconConfig")) { - column._reconConfig = ReconConfig.reconstruct(obj.getJSONObject("reconConfig")); - } - if (obj.has("reconStats")) { - column._reconStats = ReconStats.load(obj.getJSONObject("reconStats")); - } - - return column; - } -} diff --git a/main/src/com/google/gridworks/model/ColumnGroup.java b/main/src/com/google/gridworks/model/ColumnGroup.java deleted file mode 100644 index d1b90b13c..000000000 --- a/main/src/com/google/gridworks/model/ColumnGroup.java +++ /dev/null @@ -1,76 +0,0 @@ -package com.google.gridworks.model; - -import java.io.Writer; -import java.util.LinkedList; -import java.util.List; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.Jsonizable; -import com.google.gridworks.util.ParsingUtilities; - -public class ColumnGroup implements Jsonizable { - final public int startColumnIndex; - final public int columnSpan; - final public int keyColumnIndex; // could be -1 if there is no key cell - - transient public ColumnGroup parentGroup; - transient public List subgroups; - - public ColumnGroup(int startColumnIndex, int columnSpan, int keyColumnIndex) { - this.startColumnIndex = startColumnIndex; - this.columnSpan = columnSpan; - this.keyColumnIndex = keyColumnIndex; - internalInitialize(); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - - writer.key("startColumnIndex"); writer.value(startColumnIndex); - writer.key("columnSpan"); writer.value(columnSpan); - writer.key("keyColumnIndex"); writer.value(keyColumnIndex); - - if (!"save".equals(options.get("mode")) && (subgroups != null) && (subgroups.size() > 0)) { - writer.key("subgroups"); writer.array(); - for (ColumnGroup g : subgroups) { - g.write(writer, options); - } - writer.endArray(); - } - writer.endObject(); - } - - public boolean contains(ColumnGroup g) { - return (g.startColumnIndex >= startColumnIndex && - g.startColumnIndex < startColumnIndex + columnSpan); - } - - public void save(Writer writer) { - JSONWriter jsonWriter = new JSONWriter(writer); - try { - write(jsonWriter, new Properties()); - } catch (JSONException e) { - e.printStackTrace(); - } - } - - static public ColumnGroup load(String s) throws Exception { - JSONObject obj = ParsingUtilities.evaluateJsonStringToObject(s); - - return new ColumnGroup( - obj.getInt("startColumnIndex"), - obj.getInt("columnSpan"), - obj.getInt("keyColumnIndex") - ); - } - - protected void internalInitialize() { - subgroups = new LinkedList(); - } -} diff --git a/main/src/com/google/gridworks/model/ColumnModel.java b/main/src/com/google/gridworks/model/ColumnModel.java deleted file mode 100644 index ab97c7e8e..000000000 --- a/main/src/com/google/gridworks/model/ColumnModel.java +++ /dev/null @@ -1,244 +0,0 @@ -package com.google.gridworks.model; - -import java.io.IOException; -import java.io.LineNumberReader; -import java.io.Writer; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.Jsonizable; - -public class ColumnModel implements Jsonizable { - final public List columns = new LinkedList(); - final public List columnGroups = new LinkedList(); - - private int _maxCellIndex; - private int _keyColumnIndex; - - transient protected Map _nameToColumn; - transient protected Map _cellIndexToColumn; - transient protected List _rootColumnGroups; - transient protected List _columnNames; - - public ColumnModel() { - internalInitialize(); - } - - synchronized public void setMaxCellIndex(int maxCellIndex) { - this._maxCellIndex = Math.max(this._maxCellIndex, maxCellIndex); - } - - public int getMaxCellIndex() { - return _maxCellIndex; - } - - synchronized public int allocateNewCellIndex() { - return ++_maxCellIndex; - } - - public void setKeyColumnIndex(int keyColumnIndex) { - // TODO: check validity of new cell index, e.g., it's not in any group - this._keyColumnIndex = keyColumnIndex; - } - - public int getKeyColumnIndex() { - return _keyColumnIndex; - } - - synchronized public void addColumnGroup(int startColumnIndex, int span, int keyColumnIndex) { - for (ColumnGroup g : columnGroups) { - if (g.startColumnIndex == startColumnIndex && g.columnSpan == span) { - if (g.keyColumnIndex == keyColumnIndex) { - return; - } else { - columnGroups.remove(g); - break; - } - } - } - - ColumnGroup cg = new ColumnGroup(startColumnIndex, span, keyColumnIndex); - - columnGroups.add(cg); - - } - - public void update() { - internalInitialize(); - } - - synchronized public void addColumn(int index, Column column, boolean avoidNameCollision) throws ModelException { - String baseName = column.getName(); - - if (_nameToColumn.containsKey(baseName)) { - if (!avoidNameCollision) { - throw new ModelException("Duplicated column name"); - } - } - - String name = baseName; - int i = 1; - while (true) { - if (_nameToColumn.containsKey(name)) { - i++; - name = baseName + i; - } else { - break; - } - } - - column.setName(name); - columns.add(index < 0 ? columns.size() : index, column); - _nameToColumn.put(name, column); // so the next call can check - } - - synchronized public Column getColumnByName(String name) { - return _nameToColumn.get(name); - } - - synchronized public int getColumnIndexByName(String name) { - for (int i = 0; i < _columnNames.size(); i++) { - String s = _columnNames.get(i); - if (name.equals(s)) { - return i; - } - } - return -1; - } - - synchronized public Column getColumnByCellIndex(int cellIndex) { - return _cellIndexToColumn.get(cellIndex); - } - - synchronized public List getColumnNames() { - return _columnNames; - } - - synchronized public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - - writer.key("columns"); - writer.array(); - for (Column column : columns) { - column.write(writer, options); - } - writer.endArray(); - - if (columns.size() > 0) { - writer.key("keyCellIndex"); writer.value(getKeyColumnIndex()); - writer.key("keyColumnName"); writer.value(columns.get(_keyColumnIndex).getName()); - } - - writer.key("columnGroups"); - writer.array(); - for (ColumnGroup g : _rootColumnGroups) { - g.write(writer, options); - } - writer.endArray(); - - writer.endObject(); - } - - synchronized public void save(Writer writer, Properties options) throws IOException { - writer.write("maxCellIndex="); writer.write(Integer.toString(_maxCellIndex)); writer.write('\n'); - writer.write("keyColumnIndex="); writer.write(Integer.toString(_keyColumnIndex)); writer.write('\n'); - - writer.write("columnCount="); writer.write(Integer.toString(columns.size())); writer.write('\n'); - for (Column column : columns) { - column.save(writer); writer.write('\n'); - } - - writer.write("columnGroupCount="); writer.write(Integer.toString(columnGroups.size())); writer.write('\n'); - for (ColumnGroup group : columnGroups) { - group.save(writer); writer.write('\n'); - } - - writer.write("/e/\n"); - } - - synchronized public void load(LineNumberReader reader) throws Exception { - String line; - while ((line = reader.readLine()) != null && !"/e/".equals(line)) { - int equal = line.indexOf('='); - CharSequence field = line.subSequence(0, equal); - String value = line.substring(equal + 1); - - if ("maxCellIndex".equals(field)) { - _maxCellIndex = Integer.parseInt(value); - } else if ("keyColumnIndex".equals(field)) { - _keyColumnIndex = Integer.parseInt(value); - } else if ("columnCount".equals(field)) { - int count = Integer.parseInt(value); - - for (int i = 0; i < count; i++) { - columns.add(Column.load(reader.readLine())); - } - } else if ("columnGroupCount".equals(field)) { - int count = Integer.parseInt(value); - - for (int i = 0; i < count; i++) { - columnGroups.add(ColumnGroup.load(reader.readLine())); - } - } - } - - internalInitialize(); - } - - synchronized protected void internalInitialize() { - generateMaps(); - - // Turn the flat list of column groups into a tree - - _rootColumnGroups = new LinkedList(columnGroups); - Collections.sort(_rootColumnGroups, new Comparator() { - public int compare(ColumnGroup o1, ColumnGroup o2) { - int firstDiff = o1.startColumnIndex - o2.startColumnIndex; - return firstDiff != 0 ? - firstDiff : // whichever group that starts first goes first - (o2.columnSpan - o1.columnSpan); // otherwise, the larger group goes first - } - }); - - for (int i = _rootColumnGroups.size() - 1; i >= 0; i--) { - ColumnGroup g = _rootColumnGroups.get(i); - - for (int j = i + 1; j < _rootColumnGroups.size(); j++) { - ColumnGroup g2 = _rootColumnGroups.get(j); - if (g2.parentGroup == null && g.contains(g2)) { - g2.parentGroup = g; - g.subgroups.add(g2); - } - } - } - - for (int i = _rootColumnGroups.size() - 1; i >= 0; i--) { - if (_rootColumnGroups.get(i).parentGroup != null) { - _rootColumnGroups.remove(i); - } - } - } - - protected void generateMaps() { - _nameToColumn = new HashMap(); - _cellIndexToColumn = new HashMap(); - _columnNames = new ArrayList(); - - for (Column column : columns) { - _nameToColumn.put(column.getName(), column); - _cellIndexToColumn.put(column.getCellIndex(), column); - _columnNames.add(column.getName()); - } - } -} diff --git a/main/src/com/google/gridworks/model/ModelException.java b/main/src/com/google/gridworks/model/ModelException.java deleted file mode 100644 index 4a946ac7c..000000000 --- a/main/src/com/google/gridworks/model/ModelException.java +++ /dev/null @@ -1,25 +0,0 @@ -package com.google.gridworks.model; - -public class ModelException extends Exception { - private static final long serialVersionUID = -168448967638065467L; - - public ModelException() { - // TODO Auto-generated constructor stub - } - - public ModelException(String message) { - super(message); - // TODO Auto-generated constructor stub - } - - public ModelException(Throwable cause) { - super(cause); - // TODO Auto-generated constructor stub - } - - public ModelException(String message, Throwable cause) { - super(message, cause); - // TODO Auto-generated constructor stub - } - -} diff --git a/main/src/com/google/gridworks/model/OverlayModel.java b/main/src/com/google/gridworks/model/OverlayModel.java deleted file mode 100644 index d933a750b..000000000 --- a/main/src/com/google/gridworks/model/OverlayModel.java +++ /dev/null @@ -1,11 +0,0 @@ -package com.google.gridworks.model; - -import com.google.gridworks.Jsonizable; - -public interface OverlayModel extends Jsonizable { - public void onBeforeSave(); - - public void onAfterSave(); - - public void dispose(); -} diff --git a/main/src/com/google/gridworks/model/Project.java b/main/src/com/google/gridworks/model/Project.java deleted file mode 100644 index d3070e38e..000000000 --- a/main/src/com/google/gridworks/model/Project.java +++ /dev/null @@ -1,230 +0,0 @@ -package com.google.gridworks.model; - -import java.io.IOException; -import java.io.LineNumberReader; -import java.io.OutputStream; -import java.io.OutputStreamWriter; -import java.io.Writer; -import java.lang.reflect.Method; -import java.util.ArrayList; -import java.util.Date; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.google.gridworks.GridworksServlet; -import com.google.gridworks.ProjectManager; -import com.google.gridworks.ProjectMetadata; -import com.google.gridworks.history.History; -import com.google.gridworks.process.ProcessManager; -import com.google.gridworks.protograph.Protograph; -import com.google.gridworks.util.ParsingUtilities; -import com.google.gridworks.util.Pool; - -public class Project { - final static protected Map> - s_overlayModelClasses = new HashMap>(); - - static public void registerOverlayModel(String modelName, Class klass) { - s_overlayModelClasses.put(modelName, klass); - } - - static { - registerOverlayModel("freebaseProtograph", Protograph.class); - } - - final public long id; - final public List rows = new ArrayList(); - - final public ColumnModel columnModel = new ColumnModel(); - final public RecordModel recordModel = new RecordModel(); - final public Map overlayModels = new HashMap(); - - final public History history; - - transient public ProcessManager processManager = new ProcessManager(); - transient private Date _lastSave = new Date(); - - final static Logger logger = LoggerFactory.getLogger("project"); - - static public long generateID() { - return System.currentTimeMillis() + Math.round(Math.random() * 1000000000000L); - } - - public Project() { - id = generateID(); - history = new History(this); - } - - protected Project(long id) { - this.id = id; - this.history = new History(this); - } - - public void dispose() { - for (OverlayModel overlayModel : overlayModels.values()) { - try { - overlayModel.dispose(); - } catch (Exception e) { - logger.warn("Error signaling overlay model before disposing", e); - } - } - } - - public Date getLastSave(){ - return this._lastSave; - } - /** - * Sets the lastSave time to now - */ - public void setLastSave(){ - this._lastSave = new Date(); - } - - public ProjectMetadata getMetadata() { - return ProjectManager.singleton.getProjectMetadata(id); - } - - public void saveToOutputStream(OutputStream out, Pool pool) throws IOException { - for (OverlayModel overlayModel : overlayModels.values()) { - try { - overlayModel.onBeforeSave(); - } catch (Exception e) { - logger.warn("Error signaling overlay model before saving", e); - } - } - - Writer writer = new OutputStreamWriter(out); - try { - Properties options = new Properties(); - options.setProperty("mode", "save"); - options.put("pool", pool); - - saveToWriter(writer, options); - } finally { - writer.flush(); - } - - for (OverlayModel overlayModel : overlayModels.values()) { - try { - overlayModel.onAfterSave(); - } catch (Exception e) { - logger.warn("Error signaling overlay model after saving", e); - } - } - } - - protected void saveToWriter(Writer writer, Properties options) throws IOException { - writer.write(GridworksServlet.getVersion()); writer.write('\n'); - - writer.write("columnModel=\n"); columnModel.save(writer, options); - writer.write("history=\n"); history.save(writer, options); - - for (String modelName : overlayModels.keySet()) { - writer.write("overlayModel:"); - writer.write(modelName); - writer.write("="); - - try { - JSONWriter jsonWriter = new JSONWriter(writer); - - overlayModels.get(modelName).write(jsonWriter, options); - } catch (JSONException e) { - e.printStackTrace(); - } - writer.write('\n'); - } - - writer.write("rowCount="); writer.write(Integer.toString(rows.size())); writer.write('\n'); - for (Row row : rows) { - row.save(writer, options); writer.write('\n'); - } - } - - static public Project loadFromReader( - LineNumberReader reader, - long id, - Pool pool - ) throws Exception { - long start = System.currentTimeMillis(); - - /* String version = */ reader.readLine(); - - Project project = new Project(id); - int maxCellCount = 0; - - String line; - while ((line = reader.readLine()) != null) { - int equal = line.indexOf('='); - String field = line.substring(0, equal); - String value = line.substring(equal + 1); - - // backward compatibility - if ("protograph".equals(field)) { - field = "overlayModel:freebaseProtograph"; - } - - if ("columnModel".equals(field)) { - project.columnModel.load(reader); - } else if ("history".equals(field)) { - project.history.load(project, reader); - } else if ("rowCount".equals(field)) { - int count = Integer.parseInt(value); - - for (int i = 0; i < count; i++) { - line = reader.readLine(); - if (line != null) { - Row row = Row.load(line, pool); - project.rows.add(row); - maxCellCount = Math.max(maxCellCount, row.cells.size()); - } - } - } else if (field.startsWith("overlayModel:")) { - String modelName = field.substring("overlayModel:".length()); - if (s_overlayModelClasses.containsKey(modelName)) { - Class klass = s_overlayModelClasses.get(modelName); - - try { - Method loadMethod = klass.getMethod("load", Project.class, JSONObject.class); - JSONObject obj = ParsingUtilities.evaluateJsonStringToObject(value); - - OverlayModel overlayModel = (OverlayModel) loadMethod.invoke(null, project, obj); - - project.overlayModels.put(modelName, overlayModel); - } catch (Exception e) { - logger.error("Failed to load overlay model " + modelName); - } - } - } - } - - project.columnModel.setMaxCellIndex(maxCellCount - 1); - - logger.info( - "Loaded project {} from disk in {} sec(s)",id,Long.toString((System.currentTimeMillis() - start) / 1000) - ); - - project.update(); - - return project; - } - - public void update() { - columnModel.update(); - recordModel.update(this); - } - - - //wrapper of processManager variable to allow unit testing - //TODO make the processManager variable private, and force all calls through this method - public ProcessManager getProcessManager() { - return this.processManager; - } -} diff --git a/main/src/com/google/gridworks/model/Recon.java b/main/src/com/google/gridworks/model/Recon.java deleted file mode 100644 index 6834a6ded..000000000 --- a/main/src/com/google/gridworks/model/Recon.java +++ /dev/null @@ -1,376 +0,0 @@ -package com.google.gridworks.model; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Properties; - -import org.codehaus.jackson.JsonFactory; -import org.codehaus.jackson.JsonParser; -import org.codehaus.jackson.JsonToken; -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.Jsonizable; -import com.google.gridworks.expr.HasFields; -import com.google.gridworks.util.Pool; - -public class Recon implements HasFields, Jsonizable { - - static public enum Judgment { - None, - Matched, - New - } - - static public String judgmentToString(Judgment judgment) { - if (judgment == Judgment.Matched) { - return "matched"; - } else if (judgment == Judgment.New) { - return "new"; - } else { - return "none"; - } - } - - static public Judgment stringToJudgment(String s) { - if ("matched".equals(s)) { - return Judgment.Matched; - } else if ("new".equals(s)) { - return Judgment.New; - } else { - return Judgment.None; - } - } - - static final public int Feature_typeMatch = 0; - static final public int Feature_nameMatch = 1; - static final public int Feature_nameLevenshtein = 2; - static final public int Feature_nameWordDistance = 3; - static final public int Feature_qaResult = 4; - static final public int Feature_max = 5; - - static final protected Map s_featureMap = new HashMap(); - static { - s_featureMap.put("typeMatch", Feature_typeMatch); - s_featureMap.put("nameMatch", Feature_nameMatch); - s_featureMap.put("nameLevenshtein", Feature_nameLevenshtein); - s_featureMap.put("nameWordDistance", Feature_nameWordDistance); - s_featureMap.put("qaResult", Feature_qaResult); - } - - final public long id; - public String service = "unknown"; - public String identifierSpace = null; - public String schemaSpace = null; - - public Object[] features = new Object[Feature_max]; - public List candidates; - - public Judgment judgment = Judgment.None; - public String judgmentAction = "unknown"; - public long judgmentHistoryEntry; - public int judgmentBatchSize = 0; - - public ReconCandidate match = null; - public int matchRank = -1; - - static public Recon makeFreebaseRecon(long judgmentHistoryEntry) { - return new Recon( - judgmentHistoryEntry, - "http://rdf.freebase.com/ns/type.object.id", - "http://rdf.freebase.com/ns/type.object.id"); - } - - public Recon(long judgmentHistoryEntry, String identifierSpace, String schemaSpace) { - id = System.currentTimeMillis() * 1000000 + Math.round(Math.random() * 1000000); - this.judgmentHistoryEntry = judgmentHistoryEntry; - this.identifierSpace = identifierSpace; - this.schemaSpace = schemaSpace; - } - - protected Recon(long id, long judgmentHistoryEntry) { - this.id = id; - this.judgmentHistoryEntry = judgmentHistoryEntry; - } - - public Recon dup() { - Recon r = new Recon(id, judgmentHistoryEntry); - r.identifierSpace = identifierSpace; - r.schemaSpace = schemaSpace; - - copyTo(r); - - return r; - } - - public Recon dup(long judgmentHistoryEntry) { - Recon r = new Recon(judgmentHistoryEntry, identifierSpace, schemaSpace); - - copyTo(r); - - return r; - } - - protected void copyTo(Recon r) { - System.arraycopy(features, 0, r.features, 0, features.length); - - if (candidates != null) { - r.candidates = new ArrayList(candidates); - } - - r.service = service; - - r.judgment = judgment; - - r.judgmentAction = judgmentAction; - r.judgmentBatchSize = judgmentBatchSize; - - r.match = match; - r.matchRank = matchRank; - } - - public void addCandidate(ReconCandidate candidate) { - if (candidates == null) { - candidates = new ArrayList(3); - } - candidates.add(candidate); - } - - public ReconCandidate getBestCandidate() { - if (candidates != null && candidates.size() > 0) { - return candidates.get(0); - } - return null; - } - - public Object getFeature(int feature) { - return feature < features.length ? features[feature] : null; - } - - public void setFeature(int feature, Object v) { - if (feature >= features.length) { - if (feature >= Feature_max) { - return; - } - - // We deserialized this object from an older version of the class - // that had fewer features, so we can just try to extend it - - Object[] newFeatures = new Object[Feature_max]; - - System.arraycopy(features, 0, newFeatures, 0, features.length); - - features = newFeatures; - } - - features[feature] = v; - } - - public Object getField(String name, Properties bindings) { - if ("id".equals(name)) { - return id; - } else if ("best".equals(name)) { - return candidates != null && candidates.size() > 0 ? candidates.get(0) : null; - } else if ("candidates".equals(name)) { - return candidates; - } else if ("judgment".equals(name) || "judgement".equals(name)) { - return judgmentToString(); - } else if ("judgmentAction".equals(name) || "judgementAction".equals(name)) { - return judgmentAction; - } else if ("judgmentHistoryEntry".equals(name) || "judgementHistoryEntry".equals(name)) { - return judgmentHistoryEntry; - } else if ("judgmentBatchSize".equals(name) || "judgementBatchSize".equals(name)) { - return judgmentBatchSize; - } else if ("matched".equals(name)) { - return judgment == Judgment.Matched; - } else if ("new".equals(name)) { - return judgment == Judgment.New; - } else if ("match".equals(name)) { - return match; - } else if ("matchRank".equals(name)) { - return matchRank; - } else if ("features".equals(name)) { - return new Features(); - } else if ("service".equals(name)) { - return service; - } else if ("identifierSpace".equals(name)) { - return identifierSpace; - } else if ("schemaSpace".equals(name)) { - return schemaSpace; - } - return null; - } - - public boolean fieldAlsoHasFields(String name) { - return "match".equals(name) || "best".equals(name); - } - - protected String judgmentToString() { - return judgmentToString(judgment); - } - - public class Features implements HasFields { - public Object getField(String name, Properties bindings) { - int index = s_featureMap.get(name); - return index < features.length ? features[index] : null; - } - - public boolean fieldAlsoHasFields(String name) { - return false; - } - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - boolean saveMode = "save".equals(options.getProperty("mode")); - - writer.object(); - writer.key("id"); writer.value(id); - if (saveMode) { - writer.key("judgmentHistoryEntry"); writer.value(judgmentHistoryEntry); - } - - writer.key("service"); writer.value(service); - writer.key("identifierSpace"); writer.value(identifierSpace); - writer.key("schemaSpace"); writer.value(schemaSpace); - - writer.key("j"); writer.value(judgmentToString()); - if (match != null) { - writer.key("m"); - writer.value(match.id); - } - if (match == null || saveMode) { - writer.key("c"); writer.array(); - if (candidates != null) { - for (ReconCandidate c : candidates) { - writer.value(c.id); - } - } - writer.endArray(); - } - - if (saveMode) { - writer.key("f"); - writer.array(); - for (Object o : features) { - writer.value(o); - } - writer.endArray(); - - writer.key("judgmentAction"); writer.value(judgmentAction); - writer.key("judgmentBatchSize"); writer.value(judgmentBatchSize); - - if (match != null) { - writer.key("matchRank"); writer.value(matchRank); - } - } - - writer.endObject(); - } - - static public Recon loadStreaming(String s, Pool pool) throws Exception { - JsonFactory jsonFactory = new JsonFactory(); - JsonParser jp = jsonFactory.createJsonParser(s); - - if (jp.nextToken() != JsonToken.START_OBJECT) { - return null; - } - return loadStreaming(jp, pool); - } - - static public Recon loadStreaming(JsonParser jp, Pool pool) throws Exception { - JsonToken t = jp.getCurrentToken(); - if (t == JsonToken.VALUE_NULL || t != JsonToken.START_OBJECT) { - return null; - } - - Recon recon = null; - long id = -1; - long judgmentHistoryEntry = -1; - - while (jp.nextToken() != JsonToken.END_OBJECT) { - String fieldName = jp.getCurrentName(); - jp.nextToken(); - - if ("id".equals(fieldName)) { - id = jp.getLongValue(); - } else if ("judgmentHistoryEntry".equals(fieldName)) { - judgmentHistoryEntry = jp.getLongValue(); - if (recon != null) { - recon.judgmentHistoryEntry = judgmentHistoryEntry; - } - } else { - if (recon == null) { - recon = new Recon(id, judgmentHistoryEntry); - } - - if ("j".equals(fieldName)) { - recon.judgment = stringToJudgment(jp.getText()); - } else if ("m".equals(fieldName)) { - if (jp.getCurrentToken() == JsonToken.VALUE_STRING) { - String candidateID = jp.getText(); - - recon.match = pool.getReconCandidate(candidateID); - } else { - // legacy - recon.match = ReconCandidate.loadStreaming(jp); - } - } else if ("f".equals(fieldName)) { - if (jp.getCurrentToken() != JsonToken.START_ARRAY) { - return null; - } - - int feature = 0; - while (jp.nextToken() != JsonToken.END_ARRAY) { - if (feature < recon.features.length) { - JsonToken token = jp.getCurrentToken(); - if (token == JsonToken.VALUE_STRING) { - recon.features[feature++] = jp.getText(); - } else if (token == JsonToken.VALUE_NUMBER_INT) { - recon.features[feature++] = jp.getLongValue(); - } else if (token == JsonToken.VALUE_NUMBER_FLOAT) { - recon.features[feature++] = jp.getDoubleValue(); - } else if (token == JsonToken.VALUE_FALSE) { - recon.features[feature++] = false; - } else if (token == JsonToken.VALUE_TRUE) { - recon.features[feature++] = true; - } - } - } - } else if ("c".equals(fieldName)) { - if (jp.getCurrentToken() != JsonToken.START_ARRAY) { - return null; - } - - while (jp.nextToken() != JsonToken.END_ARRAY) { - if (jp.getCurrentToken() == JsonToken.VALUE_STRING) { - String candidateID = jp.getText(); - - recon.addCandidate(pool.getReconCandidate(candidateID)); - } else { - // legacy - recon.addCandidate(ReconCandidate.loadStreaming(jp)); - } - } - } else if ("service".equals(fieldName)) { - recon.service = jp.getText(); - } else if ("identifierSpace".equals(fieldName)) { - recon.identifierSpace = jp.getText(); - } else if ("schemaSpace".equals(fieldName)) { - recon.schemaSpace = jp.getText(); - } else if ("judgmentAction".equals(fieldName)) { - recon.judgmentAction = jp.getText(); - } else if ("judgmentBatchSize".equals(fieldName)) { - recon.judgmentBatchSize = jp.getIntValue(); - } else if ("matchRank".equals(fieldName)) { - recon.matchRank = jp.getIntValue(); - } - } - } - - return recon; - } -} diff --git a/main/src/com/google/gridworks/model/ReconCandidate.java b/main/src/com/google/gridworks/model/ReconCandidate.java deleted file mode 100644 index 3ff38e9dd..000000000 --- a/main/src/com/google/gridworks/model/ReconCandidate.java +++ /dev/null @@ -1,124 +0,0 @@ -package com.google.gridworks.model; - -import java.util.ArrayList; -import java.util.List; -import java.util.Properties; - -import org.codehaus.jackson.JsonFactory; -import org.codehaus.jackson.JsonParser; -import org.codehaus.jackson.JsonToken; -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.Jsonizable; -import com.google.gridworks.expr.HasFields; - -public class ReconCandidate implements HasFields, Jsonizable { - final public String id; - final public String name; - final public String[] types; - final public double score; - - public ReconCandidate(String topicID, String topicName, String[] typeIDs, double score) { - this.id = topicID; - this.name = topicName; - this.types = typeIDs; - this.score = score; - } - - public Object getField(String name, Properties bindings) { - if ("id".equals(name)) { - return id; - } else if ("name".equals(name)) { - return this.name; - } else if ("type".equals(name)) { - return types; - } else if ("score".equals(name)) { - return score; - } - return null; - } - - public boolean fieldAlsoHasFields(String name) { - return false; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("id"); writer.value(id); - writer.key("name"); writer.value(name); - writer.key("score"); writer.value(score); - - /* if (!options.containsKey("reconCandidateOmitTypes")) */ { - writer.key("types"); writer.array(); - for (String typeID : types) { - writer.value(typeID); - } - writer.endArray(); - } - - writer.endObject(); - } - - static public ReconCandidate loadStreaming(String s) throws Exception { - JsonFactory jsonFactory = new JsonFactory(); - JsonParser jp = jsonFactory.createJsonParser(s); - - if (jp.nextToken() != JsonToken.START_OBJECT) { - return null; - } - return loadStreaming(jp); - } - - static public ReconCandidate loadStreaming(JsonParser jp) throws Exception { - JsonToken t = jp.getCurrentToken(); - if (t == JsonToken.VALUE_NULL || t != JsonToken.START_OBJECT) { - return null; - } - - String id = null; - String name = null; - List types = null; - double score = 0; - - while (jp.nextToken() != JsonToken.END_OBJECT) { - String fieldName = jp.getCurrentName(); - jp.nextToken(); - - if ("id".equals(fieldName)) { - id = jp.getText(); - } else if ("name".equals(fieldName)) { - name = jp.getText(); - } else if ("score".equals(fieldName)) { - score = jp.getDoubleValue(); - } else if ("types".equals(fieldName)) { - if (jp.getCurrentToken() != JsonToken.START_ARRAY) { - return null; - } - - types = new ArrayList(); - - while (jp.nextToken() != JsonToken.END_ARRAY) { - types.add(jp.getText()); - } - } - } - - String[] typesA; - if (types != null) { - typesA = new String[types.size()]; - types.toArray(typesA); - } else { - typesA = new String[0]; - } - - return new ReconCandidate( - id, - name, - typesA, - score - ); - } -} \ No newline at end of file diff --git a/main/src/com/google/gridworks/model/ReconStats.java b/main/src/com/google/gridworks/model/ReconStats.java deleted file mode 100644 index e37a3a732..000000000 --- a/main/src/com/google/gridworks/model/ReconStats.java +++ /dev/null @@ -1,74 +0,0 @@ -package com.google.gridworks.model; - -import java.io.Writer; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.Jsonizable; -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.model.Recon.Judgment; - -public class ReconStats implements Jsonizable { - static public ReconStats load(JSONObject obj) throws Exception { - return new ReconStats( - obj.getInt("nonBlanks"), - obj.getInt("newTopics"), - obj.getInt("matchedTopics") - ); - } - - final public int nonBlanks; - final public int newTopics; - final public int matchedTopics; - - public ReconStats(int nonBlanks, int newTopics, int matchedTopics) { - this.nonBlanks = nonBlanks; - this.newTopics = newTopics; - this.matchedTopics = matchedTopics; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("nonBlanks"); writer.value(nonBlanks); - writer.key("newTopics"); writer.value(newTopics); - writer.key("matchedTopics"); writer.value(matchedTopics); - writer.endObject(); - } - - static public ReconStats create(Project project, int cellIndex) { - int nonBlanks = 0; - int newTopics = 0; - int matchedTopics = 0; - - for (Row row : project.rows) { - Cell cell = row.getCell(cellIndex); - if (cell != null && ExpressionUtils.isNonBlankData(cell.value)) { - nonBlanks++; - - if (cell.recon != null) { - if (cell.recon.judgment == Judgment.New) { - newTopics++; - } else if (cell.recon.judgment == Judgment.Matched) { - matchedTopics++; - } - } - } - } - - return new ReconStats(nonBlanks, newTopics, matchedTopics); - } - - public void save(Writer writer) { - JSONWriter jsonWriter = new JSONWriter(writer); - try { - write(jsonWriter, new Properties()); - } catch (JSONException e) { - e.printStackTrace(); - } - } -} diff --git a/main/src/com/google/gridworks/model/Record.java b/main/src/com/google/gridworks/model/Record.java deleted file mode 100644 index fc3bcc279..000000000 --- a/main/src/com/google/gridworks/model/Record.java +++ /dev/null @@ -1,17 +0,0 @@ -package com.google.gridworks.model; - -public class Record { - final public int fromRowIndex; - final public int toRowIndex; - final public int recordIndex; - - public Record( - int fromRowIndex, - int toRowIndex, - int recordIndex - ) { - this.fromRowIndex = fromRowIndex; - this.toRowIndex = toRowIndex; - this.recordIndex = recordIndex; - } -} diff --git a/main/src/com/google/gridworks/model/RecordModel.java b/main/src/com/google/gridworks/model/RecordModel.java deleted file mode 100644 index faa3ce877..000000000 --- a/main/src/com/google/gridworks/model/RecordModel.java +++ /dev/null @@ -1,222 +0,0 @@ -package com.google.gridworks.model; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.List; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.Jsonizable; -import com.google.gridworks.expr.ExpressionUtils; - -public class RecordModel implements Jsonizable { - final static public class CellDependency { - final public int rowIndex; - final public int cellIndex; - - public CellDependency(int rowIndex, int cellIndex) { - this.rowIndex = rowIndex; - this.cellIndex = cellIndex; - } - } - - final static public class RowDependency { - public int recordIndex; - public CellDependency[] cellDependencies; - public List contextRows; - } - - protected List _rowDependencies; - protected List _records; - - public RowDependency getRowDependency(int rowIndex) { - return _rowDependencies != null && rowIndex >= 0 && rowIndex < _rowDependencies.size() ? - _rowDependencies.get(rowIndex) : null; - } - - public int getRecordCount() { - return _records.size(); - } - - public Record getRecord(int recordIndex) { - return _records != null && recordIndex >= 0 && recordIndex < _records.size() ? - _records.get(recordIndex) : null; - } - - public Record getRecordOfRow(int rowIndex) { - RowDependency rd = getRowDependency(rowIndex); - if (rd != null) { - if (rd.recordIndex < 0) { - rd = getRowDependency(rd.contextRows.get(0)); - } - return getRecord(rd.recordIndex); - } - return null; - } - - synchronized public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("hasRecords"); writer.value(_records.size() < _rowDependencies.size()); - writer.endObject(); - } - - static protected class KeyedGroup { - int[] cellIndices; - int keyCellIndex; - } - - synchronized public void update(Project project) { - synchronized (project) { - List rows = project.rows; - int rowCount = rows.size(); - - ColumnModel columnModel = project.columnModel; - List keyedGroups = computeKeyedGroups(columnModel); - int groupCount = keyedGroups.size(); - - int[] lastNonBlankRowsByGroup = new int[keyedGroups.size()]; - for (int i = 0; i < lastNonBlankRowsByGroup.length; i++) { - lastNonBlankRowsByGroup[i] = -1; - } - - _rowDependencies = new ArrayList(rowCount); - - int recordIndex = 0; - for (int r = 0; r < rowCount; r++) { - Row row = rows.get(r); - RowDependency rowDependency = new RowDependency(); - - for (int g = 0; g < groupCount; g++) { - KeyedGroup group = keyedGroups.get(g); - - if (!ExpressionUtils.isNonBlankData(row.getCellValue(group.keyCellIndex))) { - int contextRowIndex = lastNonBlankRowsByGroup[g]; - if (contextRowIndex >= 0) { - for (int dependentCellIndex : group.cellIndices) { - if (ExpressionUtils.isNonBlankData(row.getCellValue(dependentCellIndex))) { - setRowDependency( - project, - rowDependency, - dependentCellIndex, - contextRowIndex, - group.keyCellIndex - ); - } - } - } - } else { - lastNonBlankRowsByGroup[g] = r; - } - } - - if (rowDependency.cellDependencies != null && rowDependency.cellDependencies.length > 0) { - rowDependency.recordIndex = -1; - rowDependency.contextRows = new ArrayList(); - for (CellDependency cd : rowDependency.cellDependencies) { - if (cd != null) { - rowDependency.contextRows.add(cd.rowIndex); - } - } - Collections.sort(rowDependency.contextRows); - } else { - rowDependency.recordIndex = recordIndex++; - } - - _rowDependencies.add(rowDependency); - } - - _records = new ArrayList(recordIndex); - if (recordIndex > 0) { - recordIndex = 0; - - int recordRowIndex = 0; - for (int r = 1; r < rowCount; r++) { - RowDependency rd = _rowDependencies.get(r); - if (rd.recordIndex >= 0) { - _records.add(new Record(recordRowIndex, r, recordIndex++)); - - recordIndex = rd.recordIndex; - recordRowIndex = r; - } - } - - _records.add(new Record(recordRowIndex, rowCount, recordIndex++)); - } - } - } - - protected List computeKeyedGroups(ColumnModel columnModel) { - List keyedGroups = new ArrayList(); - - addRootKeyedGroup(columnModel, keyedGroups); - - for (ColumnGroup group : columnModel.columnGroups) { - if (group.keyColumnIndex >= 0) { - KeyedGroup keyedGroup = new KeyedGroup(); - keyedGroup.keyCellIndex = columnModel.columns.get(group.keyColumnIndex).getCellIndex(); - keyedGroup.cellIndices = new int[group.columnSpan - 1]; - - int c = 0; - for (int i = 0; i < group.columnSpan; i++) { - int columnIndex = group.startColumnIndex + i; - if (columnIndex != group.keyColumnIndex && columnIndex < columnModel.columns.size()) { - int cellIndex = columnModel.columns.get(columnIndex).getCellIndex(); - keyedGroup.cellIndices[c++] = cellIndex; - } - } - - keyedGroups.add(keyedGroup); - } - } - - Collections.sort(keyedGroups, new Comparator() { - public int compare(KeyedGroup o1, KeyedGroup o2) { - return o2.cellIndices.length - o1.cellIndices.length; // larger groups first - } - }); - - return keyedGroups; - } - - protected void addRootKeyedGroup(ColumnModel columnModel, List keyedGroups) { - int count = columnModel.getMaxCellIndex() + 1; - if (count > 0 && columnModel.getKeyColumnIndex() < columnModel.columns.size()) { - KeyedGroup rootKeyedGroup = new KeyedGroup(); - - rootKeyedGroup.cellIndices = new int[count - 1]; - rootKeyedGroup.keyCellIndex = columnModel.columns.get(columnModel.getKeyColumnIndex()).getCellIndex(); - - for (int i = 0; i < count; i++) { - if (i < rootKeyedGroup.keyCellIndex) { - rootKeyedGroup.cellIndices[i] = i; - } else if (i > rootKeyedGroup.keyCellIndex) { - rootKeyedGroup.cellIndices[i - 1] = i; - } - } - keyedGroups.add(rootKeyedGroup); - } - } - - protected void setRowDependency( - Project project, - RowDependency rowDependency, - int cellIndex, - int contextRowIndex, - int contextCellIndex - ) { - if (rowDependency.cellDependencies == null) { - int count = project.columnModel.getMaxCellIndex() + 1; - - rowDependency.cellDependencies = new CellDependency[count]; - } - - rowDependency.cellDependencies[cellIndex] = - new CellDependency(contextRowIndex, contextCellIndex); - } - -} diff --git a/main/src/com/google/gridworks/model/Row.java b/main/src/com/google/gridworks/model/Row.java deleted file mode 100644 index 882a01013..000000000 --- a/main/src/com/google/gridworks/model/Row.java +++ /dev/null @@ -1,201 +0,0 @@ -package com.google.gridworks.model; - -import java.io.Writer; -import java.util.ArrayList; -import java.util.List; -import java.util.Properties; -import java.util.Map.Entry; - -import org.codehaus.jackson.JsonFactory; -import org.codehaus.jackson.JsonParser; -import org.codehaus.jackson.JsonToken; -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.Jsonizable; -import com.google.gridworks.expr.CellTuple; -import com.google.gridworks.expr.HasFields; -import com.google.gridworks.util.Pool; - -public class Row implements HasFields, Jsonizable { - public boolean flagged; - public boolean starred; - final public List cells; - - private static final String FLAGGED = "flagged"; - private static final String STARRED = "starred"; - - public Row(int cellCount) { - cells = new ArrayList(cellCount); - } - - protected Row(List cells, boolean flagged, boolean starred) { - this.cells = cells; - this.flagged = flagged; - this.starred = starred; - } - - public Row dup() { - Row row = new Row(cells.size()); - row.flagged = flagged; - row.starred = starred; - row.cells.addAll(cells); - return row; - } - - public Object getField(String name, Properties bindings) { - if (FLAGGED.equals(name)) { - return flagged; - } else if (STARRED.equals(name)) { - return starred; - } - return null; - } - - public boolean fieldAlsoHasFields(String name) { - return "cells".equals(name) || "record".equals(name); - } - - public boolean isEmpty() { - for (Cell cell : cells) { - if (cell != null && cell.value != null && !isValueBlank(cell.value)) { - return false; - } - } - return true; - } - - public Cell getCell(int cellIndex) { - if (cellIndex >= 0 && cellIndex < cells.size()) { - return cells.get(cellIndex); - } else { - return null; - } - } - - public Object getCellValue(int cellIndex) { - if (cellIndex >= 0 && cellIndex < cells.size()) { - Cell cell = cells.get(cellIndex); - if (cell != null) { - return cell.value; - } - } - return null; - } - - public boolean isCellBlank(int cellIndex) { - return isValueBlank(getCellValue(cellIndex)); - } - - protected boolean isValueBlank(Object value) { - return value == null || (value instanceof String && ((String) value).trim().length() == 0); - } - - public void setCell(int cellIndex, Cell cell) { - if (cellIndex < cells.size()) { - cells.set(cellIndex, cell); - } else { - while (cellIndex > cells.size()) { - cells.add(null); - } - cells.add(cell); - } - } - - public CellTuple getCellTuple(Project project) { - return new CellTuple(project, this); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key(FLAGGED); writer.value(flagged); - writer.key(STARRED); writer.value(starred); - - writer.key("cells"); writer.array(); - for (Cell cell : cells) { - if (cell != null) { - cell.write(writer, options); - } else { - writer.value(null); - } - } - writer.endArray(); - - if (!"save".equals(options.getProperty("mode"))) { - if (options.containsKey("rowIndex")) { - int rowIndex = (Integer) options.get("rowIndex"); - writer.key("i"); writer.value(rowIndex); - - if (options.containsKey("recordIndex")) { - int recordIndex = (Integer) options.get("recordIndex"); - - writer.key("j"); writer.value(recordIndex); - } - } - - if (options.containsKey("extra")) { - Properties extra = (Properties) options.get("extra"); - if (extra != null) { - for (Entry e : extra.entrySet()) { - writer.key((String) e.getKey()); - writer.value(e.getValue()); - } - } - } - } - - writer.endObject(); - } - - public void save(Writer writer, Properties options) { - JSONWriter jsonWriter = new JSONWriter(writer); - try { - write(jsonWriter, options); - } catch (JSONException e) { - e.printStackTrace(); - } - } - - static public Row load(String s, Pool pool) throws Exception { - return s.length() == 0 ? null : - loadStreaming(s, pool); - } - - static public Row loadStreaming(String s, Pool pool) throws Exception { - JsonFactory jsonFactory = new JsonFactory(); - JsonParser jp = jsonFactory.createJsonParser(s); - - if (jp.nextToken() != JsonToken.START_OBJECT) { - return null; - } - - List cells = new ArrayList(); - boolean starred = false; - boolean flagged = false; - - while (jp.nextToken() != JsonToken.END_OBJECT) { - String fieldName = jp.getCurrentName(); - jp.nextToken(); - - if (STARRED.equals(fieldName)) { - starred = jp.getBooleanValue(); - } else if (FLAGGED.equals(fieldName)) { - flagged = jp.getBooleanValue(); - } else if ("cells".equals(fieldName)) { - if (jp.getCurrentToken() != JsonToken.START_ARRAY) { - return null; - } - - while (jp.nextToken() != JsonToken.END_ARRAY) { - Cell cell = Cell.loadStreaming(jp, pool); - - cells.add(cell); - } - } - } - - return (cells.size() > 0) ? new Row(cells, flagged, starred) : null; - } -} diff --git a/main/src/com/google/gridworks/model/changes/CellAtRow.java b/main/src/com/google/gridworks/model/changes/CellAtRow.java deleted file mode 100644 index e3144de45..000000000 --- a/main/src/com/google/gridworks/model/changes/CellAtRow.java +++ /dev/null @@ -1,35 +0,0 @@ -package com.google.gridworks.model.changes; - -import java.io.IOException; -import java.io.Writer; -import java.util.Properties; - -import com.google.gridworks.model.Cell; -import com.google.gridworks.util.Pool; - -public class CellAtRow { - - final public int row; - final public Cell cell; - - public CellAtRow(int row, Cell cell) { - this.row = row; - this.cell = cell; - } - - public void save(Writer writer, Properties options) throws IOException { - writer.write(Integer.toString(row)); - writer.write(';'); - if (cell != null) { - cell.save(writer, options); - } - } - - static public CellAtRow load(String s, Pool pool) throws Exception { - int semicolon = s.indexOf(';'); - int row = Integer.parseInt(s.substring(0, semicolon)); - Cell cell = semicolon < s.length() - 1 ? Cell.loadStreaming(s.substring(semicolon + 1), pool) : null; - - return new CellAtRow(row, cell); - } -} diff --git a/main/src/com/google/gridworks/model/changes/CellChange.java b/main/src/com/google/gridworks/model/changes/CellChange.java deleted file mode 100644 index 2d790eb08..000000000 --- a/main/src/com/google/gridworks/model/changes/CellChange.java +++ /dev/null @@ -1,82 +0,0 @@ -package com.google.gridworks.model.changes; - -import java.io.IOException; -import java.io.LineNumberReader; -import java.io.Writer; -import java.util.Properties; - -import com.google.gridworks.history.Change; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Project; -import com.google.gridworks.util.Pool; - -public class CellChange implements Change { - final public int row; - final public int cellIndex; - final public Cell oldCell; - final public Cell newCell; - - public CellChange(int row, int cellIndex, Cell oldCell, Cell newCell) { - this.row = row; - this.cellIndex = cellIndex; - this.oldCell = oldCell; - this.newCell = newCell; - } - - public void apply(Project project) { - project.rows.get(row).setCell(cellIndex, newCell); - - project.columnModel.getColumnByCellIndex(cellIndex).clearPrecomputes(); - } - - public void revert(Project project) { - project.rows.get(row).setCell(cellIndex, oldCell); - - project.columnModel.getColumnByCellIndex(cellIndex).clearPrecomputes(); - } - - public void save(Writer writer, Properties options) throws IOException { - writer.write("row="); writer.write(Integer.toString(row)); writer.write('\n'); - writer.write("cell="); writer.write(Integer.toString(cellIndex)); writer.write('\n'); - - writer.write("old="); - if (oldCell != null) { - oldCell.save(writer, options); // one liner - } - writer.write('\n'); - - writer.write("new="); - if (newCell != null) { - newCell.save(writer, options); // one liner - } - writer.write('\n'); - - writer.write("/ec/\n"); // end of change marker - } - - static public CellChange load(LineNumberReader reader, Pool pool) throws Exception { - int row = -1; - int cellIndex = -1; - Cell oldCell = null; - Cell newCell = null; - - String line; - while ((line = reader.readLine()) != null && !"/ec/".equals(line)) { - int equal = line.indexOf('='); - CharSequence field = line.subSequence(0, equal); - String value = line.substring(equal + 1); - - if ("row".equals(field)) { - row = Integer.parseInt(value); - } else if ("cell".equals(field)) { - cellIndex = Integer.parseInt(value); - } else if ("new".equals(field) && value.length() > 0) { - newCell = Cell.loadStreaming(value, pool); - } else if ("old".equals(field) && value.length() > 0) { - oldCell = Cell.loadStreaming(value, pool); - } - } - - return new CellChange(row, cellIndex, oldCell, newCell); - } -} diff --git a/main/src/com/google/gridworks/model/changes/ColumnAdditionChange.java b/main/src/com/google/gridworks/model/changes/ColumnAdditionChange.java deleted file mode 100644 index a6cd96bcd..000000000 --- a/main/src/com/google/gridworks/model/changes/ColumnAdditionChange.java +++ /dev/null @@ -1,109 +0,0 @@ -package com.google.gridworks.model.changes; - -import java.io.IOException; -import java.io.LineNumberReader; -import java.io.Writer; -import java.util.ArrayList; -import java.util.List; -import java.util.Properties; - -import com.google.gridworks.history.Change; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; -import com.google.gridworks.util.Pool; - -public class ColumnAdditionChange extends ColumnChange { - final protected String _columnName; - final protected int _columnIndex; - final protected CellAtRow[] _newCells; - protected int _newCellIndex = -1; - - public ColumnAdditionChange(String columnName, int columnIndex, List newCells) { - _columnName = columnName; - _columnIndex = columnIndex; - _newCells = new CellAtRow[newCells.size()]; - newCells.toArray(_newCells); - } - - public void apply(Project project) { - synchronized (project) { - if (_newCellIndex < 0) { - _newCellIndex = project.columnModel.allocateNewCellIndex(); - } - - Column column = new Column(_newCellIndex, _columnName); - - project.columnModel.columns.add(_columnIndex, column); - try { - for (CellAtRow cell : _newCells) { - project.rows.get(cell.row).setCell(_newCellIndex, cell.cell); - } - } catch (Exception e) { - e.printStackTrace(); - } - project.update(); - } - } - - public void revert(Project project) { - synchronized (project) { - for (CellAtRow cell : _newCells) { - Row row = project.rows.get(cell.row); - row.setCell(_newCellIndex, null); - } - - project.columnModel.columns.remove(_columnIndex); - - project.update(); - } - } - - public void save(Writer writer, Properties options) throws IOException { - writer.write("columnName="); writer.write(_columnName); writer.write('\n'); - writer.write("columnIndex="); writer.write(Integer.toString(_columnIndex)); writer.write('\n'); - writer.write("newCellIndex="); writer.write(Integer.toString(_newCellIndex)); writer.write('\n'); - writer.write("newCellCount="); writer.write(Integer.toString(_newCells.length)); writer.write('\n'); - for (CellAtRow c : _newCells) { - c.save(writer, options); - writer.write('\n'); - } - writer.write("/ec/\n"); // end of change marker - } - - static public Change load(LineNumberReader reader, Pool pool) throws Exception { - String columnName = null; - int columnIndex = -1; - int newCellIndex = -1; - List newCells = null; - - String line; - while ((line = reader.readLine()) != null && !"/ec/".equals(line)) { - int equal = line.indexOf('='); - CharSequence field = line.subSequence(0, equal); - - if ("columnName".equals(field)) { - columnName = line.substring(equal + 1); - } else if ("columnIndex".equals(field)) { - columnIndex = Integer.parseInt(line.substring(equal + 1)); - } else if ("newCellIndex".equals(field)) { - newCellIndex = Integer.parseInt(line.substring(equal + 1)); - } else if ("newCellCount".equals(field)) { - int newCellCount = Integer.parseInt(line.substring(equal + 1)); - - newCells = new ArrayList(newCellCount); - for (int i = 0; i < newCellCount; i++) { - line = reader.readLine(); - if (line != null) { - newCells.add(CellAtRow.load(line, pool)); - } - } - } - } - - ColumnAdditionChange change = new ColumnAdditionChange(columnName, columnIndex, newCells); - change._newCellIndex = newCellIndex; - - return change; - } -} diff --git a/main/src/com/google/gridworks/model/changes/ColumnChange.java b/main/src/com/google/gridworks/model/changes/ColumnChange.java deleted file mode 100644 index f7f09ce5b..000000000 --- a/main/src/com/google/gridworks/model/changes/ColumnChange.java +++ /dev/null @@ -1,6 +0,0 @@ -package com.google.gridworks.model.changes; - -import com.google.gridworks.history.Change; - -abstract public class ColumnChange implements Change { -} diff --git a/main/src/com/google/gridworks/model/changes/ColumnMoveChange.java b/main/src/com/google/gridworks/model/changes/ColumnMoveChange.java deleted file mode 100644 index 789d92c51..000000000 --- a/main/src/com/google/gridworks/model/changes/ColumnMoveChange.java +++ /dev/null @@ -1,75 +0,0 @@ -package com.google.gridworks.model.changes; - -import java.io.IOException; -import java.io.LineNumberReader; -import java.io.Writer; -import java.util.Properties; - -import com.google.gridworks.history.Change; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.util.Pool; - -public class ColumnMoveChange extends ColumnChange { - final protected String _columnName; - final protected int _newColumnIndex; - protected int _oldColumnIndex; - - public ColumnMoveChange(String columnName, int index) { - _columnName = columnName; - _newColumnIndex = index; - } - - public void apply(Project project) { - synchronized (project) { - _oldColumnIndex = project.columnModel.getColumnIndexByName(_columnName); - - Column column = project.columnModel.columns.remove(_oldColumnIndex); - project.columnModel.columns.add(_newColumnIndex, column); - - project.update(); - } - } - - public void revert(Project project) { - synchronized (project) { - Column column = project.columnModel.columns.remove(_newColumnIndex); - project.columnModel.columns.add(_oldColumnIndex, column); - - project.update(); - } - } - - public void save(Writer writer, Properties options) throws IOException { - writer.write("columnName="); writer.write(_columnName); writer.write('\n'); - writer.write("oldColumnIndex="); writer.write(Integer.toString(_oldColumnIndex)); writer.write('\n'); - writer.write("newColumnIndex="); writer.write(Integer.toString(_newColumnIndex)); writer.write('\n'); - writer.write("/ec/\n"); // end of change marker - } - - static public Change load(LineNumberReader reader, Pool pool) throws Exception { - String columnName = null; - int oldColumnIndex = -1; - int newColumnIndex = -1; - - String line; - while ((line = reader.readLine()) != null && !"/ec/".equals(line)) { - int equal = line.indexOf('='); - CharSequence field = line.subSequence(0, equal); - - String value = line.substring(equal + 1); - if ("oldColumnIndex".equals(field)) { - oldColumnIndex = Integer.parseInt(value); - } else if ("newColumnIndex".equals(field)) { - newColumnIndex = Integer.parseInt(value); - } else if ("columnName".equals(field)) { - columnName = value; - } - } - - ColumnMoveChange change = new ColumnMoveChange(columnName, newColumnIndex); - change._oldColumnIndex = oldColumnIndex; - - return change; - } -} diff --git a/main/src/com/google/gridworks/model/changes/ColumnRemovalChange.java b/main/src/com/google/gridworks/model/changes/ColumnRemovalChange.java deleted file mode 100644 index 5c1dbf4f9..000000000 --- a/main/src/com/google/gridworks/model/changes/ColumnRemovalChange.java +++ /dev/null @@ -1,103 +0,0 @@ -package com.google.gridworks.model.changes; - -import java.io.IOException; -import java.io.LineNumberReader; -import java.io.Writer; -import java.util.Properties; - -import com.google.gridworks.history.Change; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; -import com.google.gridworks.util.Pool; - -public class ColumnRemovalChange extends ColumnChange { - final protected int _oldColumnIndex; - protected Column _oldColumn; - protected CellAtRow[] _oldCells; - - public ColumnRemovalChange(int index) { - _oldColumnIndex = index; - } - - public void apply(Project project) { - synchronized (project) { - _oldColumn = project.columnModel.columns.remove(_oldColumnIndex); - _oldCells = new CellAtRow[project.rows.size()]; - - int cellIndex = _oldColumn.getCellIndex(); - for (int i = 0; i < _oldCells.length; i++) { - Row row = project.rows.get(i); - - Cell oldCell = null; - if (cellIndex < row.cells.size()) { - oldCell = row.cells.get(cellIndex); - } - _oldCells[i] = new CellAtRow(i, oldCell); - - row.setCell(cellIndex, null); - } - - project.update(); - } - } - - public void revert(Project project) { - synchronized (project) { - project.columnModel.columns.add(_oldColumnIndex, _oldColumn); - - int cellIndex = _oldColumn.getCellIndex(); - for (CellAtRow cell : _oldCells) { - project.rows.get(cell.row).cells.set(cellIndex, cell.cell); - } - - project.update(); - } - } - - public void save(Writer writer, Properties options) throws IOException { - writer.write("oldColumnIndex="); writer.write(Integer.toString(_oldColumnIndex)); writer.write('\n'); - writer.write("oldColumn="); _oldColumn.save(writer); writer.write('\n'); - writer.write("oldCellCount="); writer.write(Integer.toString(_oldCells.length)); writer.write('\n'); - for (CellAtRow c : _oldCells) { - c.save(writer, options); - writer.write('\n'); - } - writer.write("/ec/\n"); // end of change marker - } - - static public Change load(LineNumberReader reader, Pool pool) throws Exception { - int oldColumnIndex = -1; - Column oldColumn = null; - CellAtRow[] oldCells = null; - - String line; - while ((line = reader.readLine()) != null && !"/ec/".equals(line)) { - int equal = line.indexOf('='); - CharSequence field = line.subSequence(0, equal); - - if ("oldColumnIndex".equals(field)) { - oldColumnIndex = Integer.parseInt(line.substring(equal + 1)); - } else if ("oldColumn".equals(field)) { - oldColumn = Column.load(line.substring(equal + 1)); - } else if ("oldCellCount".equals(field)) { - int oldCellCount = Integer.parseInt(line.substring(equal + 1)); - - oldCells = new CellAtRow[oldCellCount]; - for (int i = 0; i < oldCellCount; i++) { - line = reader.readLine(); - if (line != null) { - oldCells[i] = CellAtRow.load(line, pool); - } - } - } - } - - ColumnRemovalChange change = new ColumnRemovalChange(oldColumnIndex); - change._oldColumn = oldColumn; - change._oldCells = oldCells; - - return change; - } -} diff --git a/main/src/com/google/gridworks/model/changes/ColumnRenameChange.java b/main/src/com/google/gridworks/model/changes/ColumnRenameChange.java deleted file mode 100644 index 94a2e5c00..000000000 --- a/main/src/com/google/gridworks/model/changes/ColumnRenameChange.java +++ /dev/null @@ -1,62 +0,0 @@ -package com.google.gridworks.model.changes; - -import java.io.IOException; -import java.io.LineNumberReader; -import java.io.Writer; -import java.util.Properties; - -import com.google.gridworks.history.Change; -import com.google.gridworks.model.Project; -import com.google.gridworks.util.Pool; - -public class ColumnRenameChange extends ColumnChange { - final protected String _oldColumnName; - final protected String _newColumnName; - - public ColumnRenameChange(String oldColumnName, String newColumnName) { - _oldColumnName = oldColumnName; - _newColumnName = newColumnName; - } - - public void apply(Project project) { - synchronized (project) { - project.columnModel.getColumnByName(_oldColumnName).setName(_newColumnName); - project.columnModel.update(); - } - } - - public void revert(Project project) { - synchronized (project) { - project.columnModel.getColumnByName(_newColumnName).setName(_oldColumnName); - project.columnModel.update(); - } - } - - public void save(Writer writer, Properties options) throws IOException { - writer.write("oldColumnName="); writer.write(_oldColumnName); writer.write('\n'); - writer.write("newColumnName="); writer.write(_newColumnName); writer.write('\n'); - writer.write("/ec/\n"); // end of change marker - } - - static public Change load(LineNumberReader reader, Pool pool) throws Exception { - String oldColumnName = null; - String newColumnName = null; - - String line; - while ((line = reader.readLine()) != null && !"/ec/".equals(line)) { - int equal = line.indexOf('='); - CharSequence field = line.subSequence(0, equal); - String value = line.substring(equal + 1); - - if ("oldColumnName".equals(field)) { - oldColumnName = value; - } else if ("newColumnName".equals(field)) { - newColumnName = value; - } - } - - ColumnRenameChange change = new ColumnRenameChange(oldColumnName, newColumnName); - - return change; - } -} diff --git a/main/src/com/google/gridworks/model/changes/ColumnReorderChange.java b/main/src/com/google/gridworks/model/changes/ColumnReorderChange.java deleted file mode 100644 index e414c5db5..000000000 --- a/main/src/com/google/gridworks/model/changes/ColumnReorderChange.java +++ /dev/null @@ -1,114 +0,0 @@ -package com.google.gridworks.model.changes; - -import java.io.IOException; -import java.io.LineNumberReader; -import java.io.Writer; -import java.util.ArrayList; -import java.util.List; -import java.util.Properties; - -import com.google.gridworks.history.Change; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.util.Pool; - -public class ColumnReorderChange extends ColumnChange { - final protected List _columnNames; - protected List _oldColumns; - protected List _newColumns; - - public ColumnReorderChange(List columnNames) { - _columnNames = columnNames; - } - - public void apply(Project project) { - synchronized (project) { - if (_newColumns == null) { - _newColumns = new ArrayList(); - _oldColumns = new ArrayList(project.columnModel.columns); - - for (String n : _columnNames) { - Column column = project.columnModel.getColumnByName(n); - if (column != null) { - _newColumns.add(column); - } - } - } - - project.columnModel.columns.clear(); - project.columnModel.columns.addAll(_newColumns); - project.update(); - } - } - - public void revert(Project project) { - synchronized (project) { - project.columnModel.columns.clear(); - project.columnModel.columns.addAll(_oldColumns); - project.update(); - } - } - - public void save(Writer writer, Properties options) throws IOException { - writer.write("columnNameCount="); writer.write(Integer.toString(_columnNames.size())); writer.write('\n'); - for (String n : _columnNames) { - writer.write(n); - writer.write('\n'); - } - writer.write("oldColumnCount="); writer.write(Integer.toString(_oldColumns.size())); writer.write('\n'); - for (Column c : _oldColumns) { - c.save(writer); - writer.write('\n'); - } - writer.write("newColumnCount="); writer.write(Integer.toString(_newColumns.size())); writer.write('\n'); - for (Column c : _newColumns) { - c.save(writer); - writer.write('\n'); - } - writer.write("/ec/\n"); // end of change marker - } - - static public Change load(LineNumberReader reader, Pool pool) throws Exception { - List columnNames = new ArrayList(); - List oldColumns = new ArrayList(); - List newColumns = new ArrayList(); - - String line; - while ((line = reader.readLine()) != null && !"/ec/".equals(line)) { - int equal = line.indexOf('='); - CharSequence field = line.subSequence(0, equal); - - if ("columnNameCount".equals(field)) { - int count = Integer.parseInt(line.substring(equal + 1)); - for (int i = 0; i < count; i++) { - line = reader.readLine(); - if (line != null) { - columnNames.add(line); - } - } - } else if ("oldColumnCount".equals(field)) { - int count = Integer.parseInt(line.substring(equal + 1)); - for (int i = 0; i < count; i++) { - line = reader.readLine(); - if (line != null) { - oldColumns.add(Column.load(line)); - } - } - } else if ("newColumnCount".equals(field)) { - int count = Integer.parseInt(line.substring(equal + 1)); - for (int i = 0; i < count; i++) { - line = reader.readLine(); - if (line != null) { - newColumns.add(Column.load(line)); - } - } - } - } - - ColumnReorderChange change = new ColumnReorderChange(columnNames); - change._oldColumns = oldColumns; - change._newColumns = newColumns; - - return change; - } -} diff --git a/main/src/com/google/gridworks/model/changes/ColumnSplitChange.java b/main/src/com/google/gridworks/model/changes/ColumnSplitChange.java deleted file mode 100644 index d7d8fd80d..000000000 --- a/main/src/com/google/gridworks/model/changes/ColumnSplitChange.java +++ /dev/null @@ -1,329 +0,0 @@ -package com.google.gridworks.model.changes; - -import java.io.IOException; -import java.io.LineNumberReader; -import java.io.Serializable; -import java.io.Writer; -import java.util.ArrayList; -import java.util.List; -import java.util.Properties; - -import org.json.JSONObject; -import org.json.JSONTokener; - -import com.google.gridworks.history.Change; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; -import com.google.gridworks.util.Pool; - -public class ColumnSplitChange implements Change { - final protected String _columnName; - - final protected List _columnNames; - final protected List _rowIndices; - final protected List> _tuples; - - final protected boolean _removeOriginalColumn; - - protected Column _column; - protected int _columnIndex; - - protected int _firstNewCellIndex = -1; - protected List _oldRows; - protected List _newRows; - - public ColumnSplitChange( - String columnName, - List columnNames, - List rowIndices, - List> tuples, - boolean removeOriginalColumn - ) { - _columnName = columnName; - - _columnNames = columnNames; - _rowIndices = rowIndices; - _tuples = tuples; - - _removeOriginalColumn = removeOriginalColumn; - } - - protected ColumnSplitChange( - String columnName, - List columnNames, - List rowIndices, - List> tuples, - boolean removeOriginalColumn, - - Column column, - int columnIndex, - - int firstNewCellIndex, - List oldRows, - List newRows - ) { - _columnName = columnName; - - _columnNames = columnNames; - _rowIndices = rowIndices; - _tuples = tuples; - - _removeOriginalColumn = removeOriginalColumn; - - _column = column; - _columnIndex = columnIndex; - - _firstNewCellIndex = firstNewCellIndex; - _oldRows = oldRows; - _newRows = newRows; - } - - public void apply(Project project) { - synchronized (project) { - if (_firstNewCellIndex < 0) { - _firstNewCellIndex = project.columnModel.allocateNewCellIndex(); - for (int i = 1; i < _columnNames.size(); i++) { - project.columnModel.allocateNewCellIndex(); - } - - _column = project.columnModel.getColumnByName(_columnName); - _columnIndex = project.columnModel.getColumnIndexByName(_columnName); - - _oldRows = new ArrayList(_rowIndices.size()); - _newRows = new ArrayList(_rowIndices.size()); - - int cellIndex = _column.getCellIndex(); - - for (int i = 0; i < _rowIndices.size(); i++) { - int r = _rowIndices.get(i); - List tuple = _tuples.get(i); - - Row oldRow = project.rows.get(r); - Row newRow = oldRow.dup(); - - _oldRows.add(oldRow); - _newRows.add(newRow); - - for (int c = 0; c < tuple.size(); c++) { - Serializable value = tuple.get(c); - if (value != null) { - newRow.setCell(_firstNewCellIndex + c, new Cell(value, null)); - } - } - - if (_removeOriginalColumn) { - newRow.setCell(cellIndex, null); - } - } - } - - for (int i = 0; i < _rowIndices.size(); i++) { - int r = _rowIndices.get(i); - Row newRow = _newRows.get(i); - - project.rows.set(r, newRow); - } - - for (int i = 0; i < _columnNames.size(); i++) { - String name = _columnNames.get(i); - int cellIndex = _firstNewCellIndex + i; - - Column column = new Column(cellIndex, name); - - project.columnModel.columns.add(_columnIndex + 1 + i, column); - } - - if (_removeOriginalColumn) { - project.columnModel.columns.remove(_columnIndex); - } - - project.update(); - } - } - - public void revert(Project project) { - synchronized (project) { - for (int i = 0; i < _rowIndices.size(); i++) { - int r = _rowIndices.get(i); - Row oldRow = _oldRows.get(i); - - project.rows.set(r, oldRow); - } - - if (_removeOriginalColumn) { - project.columnModel.columns.add(_columnIndex, _column); - } - - for (int i = 0; i < _columnNames.size(); i++) { - project.columnModel.columns.remove(_columnIndex + 1); - } - - project.update(); - } - } - - public void save(Writer writer, Properties options) throws IOException { - writer.write("columnName="); writer.write(_columnName); writer.write('\n'); - - writer.write("columnNameCount="); writer.write(Integer.toString(_columnNames.size())); writer.write('\n'); - for (String name : _columnNames) { - writer.write(name); writer.write('\n'); - } - writer.write("rowIndexCount="); writer.write(Integer.toString(_rowIndices.size())); writer.write('\n'); - for (Integer rowIndex : _rowIndices) { - writer.write(rowIndex.toString()); writer.write('\n'); - } - writer.write("tupleCount="); writer.write(Integer.toString(_tuples.size())); writer.write('\n'); - for (List tuple : _tuples) { - writer.write(Integer.toString(tuple.size())); writer.write('\n'); - - for (Serializable value : tuple) { - if (value == null) { - writer.write("null"); - } else if (value instanceof String) { - writer.write(JSONObject.quote((String) value)); - } else { - writer.write(value.toString()); - } - writer.write('\n'); - } - } - writer.write("removeOriginalColumn="); writer.write(Boolean.toString(_removeOriginalColumn)); writer.write('\n'); - - writer.write("column="); _column.save(writer); writer.write('\n'); - writer.write("columnIndex="); writer.write(Integer.toString(_columnIndex)); writer.write('\n'); - - writer.write("firstNewCellIndex="); writer.write(Integer.toString(_firstNewCellIndex)); writer.write('\n'); - - writer.write("newRowCount="); writer.write(Integer.toString(_newRows.size())); writer.write('\n'); - for (Row row : _newRows) { - row.save(writer, options); - writer.write('\n'); - } - writer.write("oldRowCount="); writer.write(Integer.toString(_oldRows.size())); writer.write('\n'); - for (Row row : _oldRows) { - row.save(writer, options); - writer.write('\n'); - } - writer.write("/ec/\n"); // end of change marker - } - - static public Change load(LineNumberReader reader, Pool pool) throws Exception { - String columnName = null; - List columnNames = null; - List rowIndices = null; - List> tuples = null; - boolean removeOriginalColumn = false; - - Column column = null; - int columnIndex = -1; - - int firstNewCellIndex = -1; - List oldRows = null; - List newRows = null; - - String line; - while ((line = reader.readLine()) != null && !"/ec/".equals(line)) { - int equal = line.indexOf('='); - CharSequence field = line.subSequence(0, equal); - String value = line.substring(equal + 1); - - if ("columnName".equals(field)) { - columnName = value; - } else if ("columnNameCount".equals(field)) { - int count = Integer.parseInt(value); - - columnNames = new ArrayList(count); - for (int i = 0; i < count; i++) { - line = reader.readLine(); - if (line != null) { - columnNames.add(line); - } - } - } else if ("rowIndexCount".equals(field)) { - int count = Integer.parseInt(value); - - rowIndices = new ArrayList(count); - for (int i = 0; i < count; i++) { - line = reader.readLine(); - if (line != null) { - rowIndices.add(Integer.parseInt(line)); - } - } - } else if ("tupleCount".equals(field)) { - int count = Integer.parseInt(value); - - tuples = new ArrayList>(count); - for (int i = 0; i < count; i++) { - line = reader.readLine(); - - if (line == null) continue; - - int valueCount = Integer.parseInt(line); - - List tuple = new ArrayList(valueCount); - for (int r = 0; r < valueCount; r++) { - line = reader.readLine(); - - JSONTokener t = new JSONTokener(line); - Object o = t.nextValue(); - - tuple.add((o != JSONObject.NULL) ? (Serializable) o : null); - } - - tuples.add(tuple); - } - } else if ("removeOriginalColumn".equals(field)) { - removeOriginalColumn = Boolean.parseBoolean(value); - - } else if ("column".equals(field)) { - column = Column.load(value); - } else if ("columnIndex".equals(field)) { - columnIndex = Integer.parseInt(value); - } else if ("firstNewCellIndex".equals(field)) { - firstNewCellIndex = Integer.parseInt(value); - } else if ("oldRowCount".equals(field)) { - int count = Integer.parseInt(value); - - oldRows = new ArrayList(count); - for (int i = 0; i < count; i++) { - line = reader.readLine(); - if (line != null) { - oldRows.add(Row.load(line, pool)); - } - } - } else if ("newRowCount".equals(field)) { - int count = Integer.parseInt(value); - - newRows = new ArrayList(count); - for (int i = 0; i < count; i++) { - line = reader.readLine(); - if (line != null) { - newRows.add(Row.load(line, pool)); - } - } - } - - } - - ColumnSplitChange change = new ColumnSplitChange( - columnName, - columnNames, - rowIndices, - tuples, - removeOriginalColumn, - - column, - columnIndex, - - firstNewCellIndex, - oldRows, - newRows - ); - - - return change; - } -} diff --git a/main/src/com/google/gridworks/model/changes/DataExtensionChange.java b/main/src/com/google/gridworks/model/changes/DataExtensionChange.java deleted file mode 100644 index edb2e1d20..000000000 --- a/main/src/com/google/gridworks/model/changes/DataExtensionChange.java +++ /dev/null @@ -1,431 +0,0 @@ -package com.google.gridworks.model.changes; - -import java.io.IOException; -import java.io.LineNumberReader; -import java.io.Serializable; -import java.io.Writer; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.history.Change; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.ModelException; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Recon; -import com.google.gridworks.model.ReconCandidate; -import com.google.gridworks.model.ReconStats; -import com.google.gridworks.model.Row; -import com.google.gridworks.model.Recon.Judgment; -import com.google.gridworks.model.recon.DataExtensionReconConfig; -import com.google.gridworks.protograph.FreebaseType; -import com.google.gridworks.util.ParsingUtilities; -import com.google.gridworks.util.Pool; -import com.google.gridworks.util.FreebaseDataExtensionJob.DataExtension; - -public class DataExtensionChange implements Change { - final protected String _baseColumnName; - final protected int _columnInsertIndex; - - final protected List _columnNames; - final protected List _columnTypes; - - final protected List _rowIndices; - final protected List _dataExtensions; - - protected long _historyEntryID; - protected int _firstNewCellIndex = -1; - protected List _oldRows; - protected List _newRows; - - public DataExtensionChange( - String baseColumnName, - int columnInsertIndex, - List columnNames, - List columnTypes, - List rowIndices, - List dataExtensions, - long historyEntryID - ) { - _baseColumnName = baseColumnName; - _columnInsertIndex = columnInsertIndex; - - _columnNames = columnNames; - _columnTypes = columnTypes; - - _rowIndices = rowIndices; - _dataExtensions = dataExtensions; - - _historyEntryID = historyEntryID; - } - - protected DataExtensionChange( - String baseColumnName, - int columnInsertIndex, - - List columnNames, - List columnTypes, - - List rowIndices, - List dataExtensions, - int firstNewCellIndex, - List oldRows, - List newRows - ) { - _baseColumnName = baseColumnName; - _columnInsertIndex = columnInsertIndex; - - _columnNames = columnNames; - _columnTypes = columnTypes; - - _rowIndices = rowIndices; - _dataExtensions = dataExtensions; - - _firstNewCellIndex = firstNewCellIndex; - _oldRows = oldRows; - _newRows = newRows; - } - - public void apply(Project project) { - synchronized (project) { - if (_firstNewCellIndex < 0) { - _firstNewCellIndex = project.columnModel.allocateNewCellIndex(); - for (int i = 1; i < _columnNames.size(); i++) { - project.columnModel.allocateNewCellIndex(); - } - - _oldRows = new ArrayList(project.rows); - - _newRows = new ArrayList(project.rows.size()); - - int cellIndex = project.columnModel.getColumnByName(_baseColumnName).getCellIndex(); - int keyCellIndex = project.columnModel.columns.get(project.columnModel.getKeyColumnIndex()).getCellIndex(); - int index = 0; - - int rowIndex = index < _rowIndices.size() ? _rowIndices.get(index) : _oldRows.size(); - DataExtension dataExtension = index < _rowIndices.size() ? _dataExtensions.get(index) : null; - - index++; - - Map reconMap = new HashMap(); - - for (int r = 0; r < _oldRows.size(); r++) { - Row oldRow = _oldRows.get(r); - if (r < rowIndex) { - _newRows.add(oldRow.dup()); - continue; - } - - if (dataExtension == null || dataExtension.data.length == 0) { - _newRows.add(oldRow); - } else { - Row firstNewRow = oldRow.dup(); - extendRow(firstNewRow, dataExtension, 0, reconMap); - _newRows.add(firstNewRow); - - int r2 = r + 1; - for (int subR = 1; subR < dataExtension.data.length; subR++) { - if (r2 < project.rows.size()) { - Row oldRow2 = project.rows.get(r2); - if (oldRow2.isCellBlank(cellIndex) && - oldRow2.isCellBlank(keyCellIndex)) { - - Row newRow = oldRow2.dup(); - extendRow(newRow, dataExtension, subR, reconMap); - - _newRows.add(newRow); - r2++; - - continue; - } - } - - Row newRow = new Row(cellIndex + _columnNames.size()); - extendRow(newRow, dataExtension, subR, reconMap); - - _newRows.add(newRow); - } - - r = r2 - 1; // r will be incremented by the for loop anyway - } - - rowIndex = index < _rowIndices.size() ? _rowIndices.get(index) : _oldRows.size(); - dataExtension = index < _rowIndices.size() ? _dataExtensions.get(index) : null; - index++; - } - } - - project.rows.clear(); - project.rows.addAll(_newRows); - - for (int i = 0; i < _columnNames.size(); i++) { - String name = _columnNames.get(i); - int cellIndex = _firstNewCellIndex + i; - - Column column = new Column(cellIndex, name); - column.setReconConfig(new DataExtensionReconConfig(_columnTypes.get(i))); - column.setReconStats(ReconStats.create(project, cellIndex)); - - try { - project.columnModel.addColumn(_columnInsertIndex + i, column, true); - - // the column might have been renamed to avoid collision - _columnNames.set(i, column.getName()); - } catch (ModelException e) { - // won't get here since we set the avoid collision flag - } - } - - project.update(); - } - } - - protected void extendRow( - Row row, - DataExtension dataExtension, - int extensionRowIndex, - Map reconMap - ) { - Object[] values = dataExtension.data[extensionRowIndex]; - for (int c = 0; c < values.length; c++) { - Object value = values[c]; - Cell cell = null; - - if (value instanceof ReconCandidate) { - ReconCandidate rc = (ReconCandidate) value; - Recon recon; - if (reconMap.containsKey(rc.id)) { - recon = reconMap.get(rc.id); - } else { - recon = Recon.makeFreebaseRecon(_historyEntryID); - recon.addCandidate(rc); - recon.service = "mql"; - recon.match = rc; - recon.matchRank = 0; - recon.judgment = Judgment.Matched; - recon.judgmentAction = "auto"; - recon.judgmentBatchSize = 1; - - reconMap.put(rc.id, recon); - } - cell = new Cell(rc.name, recon); - } else { - cell = new Cell((Serializable) value, null); - } - - row.setCell(_firstNewCellIndex + c, cell); - } - } - - public void revert(Project project) { - synchronized (project) { - project.rows.clear(); - project.rows.addAll(_oldRows); - - for (int i = 0; i < _columnNames.size(); i++) { - project.columnModel.columns.remove(_columnInsertIndex); - } - - project.update(); - } - } - - public void save(Writer writer, Properties options) throws IOException { - writer.write("baseColumnName="); writer.write(_baseColumnName); writer.write('\n'); - writer.write("columnInsertIndex="); writer.write(Integer.toString(_columnInsertIndex)); writer.write('\n'); - writer.write("columnNameCount="); writer.write(Integer.toString(_columnNames.size())); writer.write('\n'); - for (String name : _columnNames) { - writer.write(name); writer.write('\n'); - } - writer.write("columnTypeCount="); writer.write(Integer.toString(_columnTypes.size())); writer.write('\n'); - for (FreebaseType type : _columnTypes) { - try { - JSONWriter jsonWriter = new JSONWriter(writer); - - type.write(jsonWriter, options); - } catch (JSONException e) { - // ??? - } - writer.write('\n'); - } - writer.write("rowIndexCount="); writer.write(Integer.toString(_rowIndices.size())); writer.write('\n'); - for (Integer rowIndex : _rowIndices) { - writer.write(rowIndex.toString()); writer.write('\n'); - } - writer.write("dataExtensionCount="); writer.write(Integer.toString(_dataExtensions.size())); writer.write('\n'); - for (DataExtension dataExtension : _dataExtensions) { - if (dataExtension == null) { - writer.write('\n'); - continue; - } - - writer.write(Integer.toString(dataExtension.data.length)); writer.write('\n'); - - for (Object[] values : dataExtension.data) { - for (Object value : values) { - if (value == null) { - writer.write("null"); - } else if (value instanceof ReconCandidate) { - try { - JSONWriter jsonWriter = new JSONWriter(writer); - ((ReconCandidate) value).write(jsonWriter, options); - } catch (JSONException e) { - // ??? - } - } else if (value instanceof String) { - writer.write(JSONObject.quote((String) value)); - } else { - writer.write(value.toString()); - } - writer.write('\n'); - } - } - } - - writer.write("firstNewCellIndex="); writer.write(Integer.toString(_firstNewCellIndex)); writer.write('\n'); - - writer.write("newRowCount="); writer.write(Integer.toString(_newRows.size())); writer.write('\n'); - for (Row row : _newRows) { - row.save(writer, options); - writer.write('\n'); - } - writer.write("oldRowCount="); writer.write(Integer.toString(_oldRows.size())); writer.write('\n'); - for (Row row : _oldRows) { - row.save(writer, options); - writer.write('\n'); - } - writer.write("/ec/\n"); // end of change marker - } - - static public Change load(LineNumberReader reader, Pool pool) throws Exception { - String baseColumnName = null; - int columnInsertIndex = -1; - - List columnNames = null; - List columnTypes = null; - - List rowIndices = null; - List dataExtensions = null; - - List oldRows = null; - List newRows = null; - - int firstNewCellIndex = -1; - - String line; - while ((line = reader.readLine()) != null && !"/ec/".equals(line)) { - int equal = line.indexOf('='); - CharSequence field = line.subSequence(0, equal); - String value = line.substring(equal + 1); - - if ("baseColumnName".equals(field)) { - baseColumnName = value; - } else if ("columnInsertIndex".equals(field)) { - columnInsertIndex = Integer.parseInt(value); - } else if ("firstNewCellIndex".equals(field)) { - firstNewCellIndex = Integer.parseInt(value); - } else if ("rowIndexCount".equals(field)) { - int count = Integer.parseInt(value); - - rowIndices = new ArrayList(count); - for (int i = 0; i < count; i++) { - line = reader.readLine(); - if (line != null) { - rowIndices.add(Integer.parseInt(line)); - } - } - } else if ("columnNameCount".equals(field)) { - int count = Integer.parseInt(value); - - columnNames = new ArrayList(count); - for (int i = 0; i < count; i++) { - line = reader.readLine(); - if (line != null) { - columnNames.add(line); - } - } - } else if ("columnTypeCount".equals(field)) { - int count = Integer.parseInt(value); - - columnTypes = new ArrayList(count); - for (int i = 0; i < count; i++) { - line = reader.readLine(); - columnTypes.add(FreebaseType.load(ParsingUtilities.evaluateJsonStringToObject(line))); - } - } else if ("dataExtensionCount".equals(field)) { - int count = Integer.parseInt(value); - - dataExtensions = new ArrayList(count); - for (int i = 0; i < count; i++) { - line = reader.readLine(); - - if (line == null) continue; - - if (line.length() == 0) { - dataExtensions.add(null); - continue; - } - - int rowCount = Integer.parseInt(line); - Object[][] data = new Object[rowCount][]; - - for (int r = 0; r < rowCount; r++) { - Object[] row = new Object[columnNames.size()]; - for (int c = 0; c < columnNames.size(); c++) { - line = reader.readLine(); - - row[c] = ReconCandidate.loadStreaming(line); - } - - data[r] = row; - } - - dataExtensions.add(new DataExtension(data)); - } - } else if ("oldRowCount".equals(field)) { - int count = Integer.parseInt(value); - - oldRows = new ArrayList(count); - for (int i = 0; i < count; i++) { - line = reader.readLine(); - if (line != null) { - oldRows.add(Row.load(line, pool)); - } - } - } else if ("newRowCount".equals(field)) { - int count = Integer.parseInt(value); - - newRows = new ArrayList(count); - for (int i = 0; i < count; i++) { - line = reader.readLine(); - if (line != null) { - newRows.add(Row.load(line, pool)); - } - } - } - - } - - DataExtensionChange change = new DataExtensionChange( - baseColumnName, - columnInsertIndex, - columnNames, - columnTypes, - rowIndices, - dataExtensions, - firstNewCellIndex, - oldRows, - newRows - ); - - - return change; - } -} diff --git a/main/src/com/google/gridworks/model/changes/MassCellChange.java b/main/src/com/google/gridworks/model/changes/MassCellChange.java deleted file mode 100644 index f0ac8b4ed..000000000 --- a/main/src/com/google/gridworks/model/changes/MassCellChange.java +++ /dev/null @@ -1,129 +0,0 @@ -package com.google.gridworks.model.changes; - -import java.io.IOException; -import java.io.LineNumberReader; -import java.io.Writer; -import java.util.List; -import java.util.Properties; - -import com.google.gridworks.history.Change; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; -import com.google.gridworks.util.Pool; - -public class MassCellChange implements Change { - final protected CellChange[] _cellChanges; - final protected String _commonColumnName; - final protected boolean _updateRowContextDependencies; - - public MassCellChange( - CellChange[] cellChanges, - String commonColumnName, - boolean updateRowContextDependencies) { - - _cellChanges = cellChanges; - _commonColumnName = commonColumnName; - _updateRowContextDependencies = updateRowContextDependencies; - } - - public MassCellChange( - List cellChanges, - String commonColumnName, - boolean updateRowContextDependencies) { - - _cellChanges = new CellChange[cellChanges.size()]; - _commonColumnName = commonColumnName; - cellChanges.toArray(_cellChanges); - - _updateRowContextDependencies = updateRowContextDependencies; - } - - public MassCellChange(CellChange cellChange, String commonColumnName, boolean updateRowContextDependencies) { - _cellChanges = new CellChange[1]; - _cellChanges[0] = cellChange; - - _commonColumnName = commonColumnName; - - _updateRowContextDependencies = updateRowContextDependencies; - } - - public void apply(Project project) { - synchronized (project) { - List rows = project.rows; - - for (CellChange cellChange : _cellChanges) { - rows.get(cellChange.row).setCell(cellChange.cellIndex, cellChange.newCell); - } - - if (_commonColumnName != null) { - Column column = project.columnModel.getColumnByName(_commonColumnName); - column.clearPrecomputes(); - } - - if (_updateRowContextDependencies) { - project.update(); - } - } - } - - public void revert(Project project) { - synchronized (project) { - List rows = project.rows; - - for (CellChange cellChange : _cellChanges) { - rows.get(cellChange.row).setCell(cellChange.cellIndex, cellChange.oldCell); - } - - if (_commonColumnName != null) { - Column column = project.columnModel.getColumnByName(_commonColumnName); - column.clearPrecomputes(); - } - - if (_updateRowContextDependencies) { - project.update(); - } - } - } - - public void save(Writer writer, Properties options) throws IOException { - if (_commonColumnName != null) { - writer.write("commonColumnName="); writer.write(_commonColumnName); writer.write('\n'); - } - writer.write("updateRowContextDependencies="); writer.write(Boolean.toString(_updateRowContextDependencies)); writer.write('\n'); - writer.write("cellChangeCount="); writer.write(Integer.toString(_cellChanges.length)); writer.write('\n'); - for (CellChange c : _cellChanges) { - c.save(writer, options); - } - writer.write("/ec/\n"); // end of change marker - } - - static public Change load(LineNumberReader reader, Pool pool) throws Exception { - String commonColumnName = null; - boolean updateRowContextDependencies = false; - CellChange[] cellChanges = null; - - String line; - while ((line = reader.readLine()) != null && !"/ec/".equals(line)) { - int equal = line.indexOf('='); - CharSequence field = line.subSequence(0, equal); - - if ("commonColumnName".equals(field)) { - commonColumnName = line.substring(equal + 1); - } else if ("updateRowContextDependencies".equals(field)) { - updateRowContextDependencies = Boolean.parseBoolean(line.substring(equal + 1)); - } else if ("cellChangeCount".equals(field)) { - int cellChangeCount = Integer.parseInt(line.substring(equal + 1)); - - cellChanges = new CellChange[cellChangeCount]; - for (int i = 0; i < cellChangeCount; i++) { - cellChanges[i] = CellChange.load(reader, pool); - } - } - } - - MassCellChange change = new MassCellChange(cellChanges, commonColumnName, updateRowContextDependencies); - - return change; - } -} diff --git a/main/src/com/google/gridworks/model/changes/MassChange.java b/main/src/com/google/gridworks/model/changes/MassChange.java deleted file mode 100644 index 14fbf1be0..000000000 --- a/main/src/com/google/gridworks/model/changes/MassChange.java +++ /dev/null @@ -1,82 +0,0 @@ -package com.google.gridworks.model.changes; - - import java.io.IOException; -import java.io.LineNumberReader; -import java.io.Writer; -import java.util.ArrayList; -import java.util.List; -import java.util.Properties; - -import com.google.gridworks.history.Change; -import com.google.gridworks.history.History; -import com.google.gridworks.model.Project; -import com.google.gridworks.util.Pool; - -public class MassChange implements Change { - final protected List _changes; - final protected boolean _updateRowContextDependencies; - - public MassChange(List changes, boolean updateRowContextDependencies) { - _changes = changes; - _updateRowContextDependencies = updateRowContextDependencies; - } - - public void apply(Project project) { - synchronized (project) { - for (Change change : _changes) { - change.apply(project); - } - - if (_updateRowContextDependencies) { - project.update(); - } - } - } - - public void revert(Project project) { - synchronized (project) { - for (Change change : _changes) { - change.revert(project); - } - - if (_updateRowContextDependencies) { - project.update(); - } - } - } - - public void save(Writer writer, Properties options) throws IOException { - writer.write("updateRowContextDependencies="); writer.write(Boolean.toString(_updateRowContextDependencies)); writer.write('\n'); - writer.write("changeCount="); writer.write(Integer.toString(_changes.size())); writer.write('\n'); - for (Change c : _changes) { - History.writeOneChange(writer, c, options); - } - writer.write("/ec/\n"); // end of change marker - } - - static public Change load(LineNumberReader reader, Pool pool) throws Exception { - boolean updateRowContextDependencies = false; - List changes = null; - - String line; - while ((line = reader.readLine()) != null && !"/ec/".equals(line)) { - int equal = line.indexOf('='); - CharSequence field = line.subSequence(0, equal); - - if ("updateRowContextDependencies".equals(field)) { - updateRowContextDependencies = Boolean.parseBoolean(line.substring(equal + 1)); - } else if ("changeCount".equals(field)) { - int changeCount = Integer.parseInt(line.substring(equal + 1)); - - changes = new ArrayList(changeCount); - for (int i = 0; i < changeCount; i++) { - changes.add(History.readOneChange(reader, pool)); - } - } - } - - MassChange change = new MassChange(changes, updateRowContextDependencies); - - return change; - } -} diff --git a/main/src/com/google/gridworks/model/changes/MassReconChange.java b/main/src/com/google/gridworks/model/changes/MassReconChange.java deleted file mode 100644 index 5d391dd24..000000000 --- a/main/src/com/google/gridworks/model/changes/MassReconChange.java +++ /dev/null @@ -1,111 +0,0 @@ -package com.google.gridworks.model.changes; - -import java.io.IOException; -import java.io.LineNumberReader; -import java.io.Writer; -import java.util.HashMap; -import java.util.Map; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.history.Change; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Recon; -import com.google.gridworks.model.Row; -import com.google.gridworks.util.Pool; - -public class MassReconChange implements Change { - final protected Map _newRecons; - final protected Map _oldRecons; - - public MassReconChange(Map newRecons, Map oldRecons) { - _newRecons = newRecons; - _oldRecons = oldRecons; - } - - public void apply(Project project) { - switchRecons(project, _newRecons); - } - - public void revert(Project project) { - switchRecons(project, _oldRecons); - } - - protected void switchRecons(Project project, Map reconMap) { - synchronized (project) { - for (int r = 0; r < project.rows.size(); r++) { - Row row = project.rows.get(r); - - for (int c = 0; c < row.cells.size(); c++) { - Cell cell = row.cells.get(c); - if (cell != null && cell.recon != null) { - Recon recon = cell.recon; - - if (reconMap.containsKey(recon.id)) { - row.setCell(c, new Cell(cell.value, reconMap.get(recon.id))); - } - } - } - } - } - } - - public void save(Writer writer, Properties options) throws IOException { - writeRecons(writer, options, _oldRecons, "oldReconCount"); - writeRecons(writer, options, _newRecons, "newReconCount"); - writer.write("/ec/\n"); // end of change marker - } - - protected void writeRecons(Writer writer, Properties options, Map recons, String key) throws IOException { - writer.write(key + "="); writer.write(Integer.toString(recons.size())); writer.write('\n'); - for (Recon recon : recons.values()) { - Pool pool = (Pool) options.get("pool"); - pool.poolReconCandidates(recon); - - JSONWriter jsonWriter = new JSONWriter(writer); - try { - recon.write(jsonWriter, options); - } catch (JSONException e) { - e.printStackTrace(); - } - writer.write("\n"); - } - } - - static public Change load(LineNumberReader reader, Pool pool) throws Exception { - Map oldRecons = new HashMap(); - Map newRecons = new HashMap(); - - String line; - while ((line = reader.readLine()) != null && !"/ec/".equals(line)) { - int equal = line.indexOf('='); - CharSequence field = line.subSequence(0, equal); - String value = line.substring(equal + 1); - - if ("oldReconCount".equals(field)) { - loadRecons(reader, pool, oldRecons, value); - } else if ("newReconCount".equals(field)) { - loadRecons(reader, pool, newRecons, value); - } - } - - MassReconChange change = new MassReconChange(newRecons, oldRecons); - - return change; - } - - static protected void loadRecons(LineNumberReader reader, Pool pool, Map recons, String countString) throws Exception { - int count = Integer.parseInt(countString); - - for (int i = 0; i < count; i++) { - String line = reader.readLine(); - Recon recon = Recon.loadStreaming(line, pool); - - recons.put(recon.id, recon); - } - } -} - diff --git a/main/src/com/google/gridworks/model/changes/MassRowChange.java b/main/src/com/google/gridworks/model/changes/MassRowChange.java deleted file mode 100644 index 91d6b4e1f..000000000 --- a/main/src/com/google/gridworks/model/changes/MassRowChange.java +++ /dev/null @@ -1,93 +0,0 @@ -package com.google.gridworks.model.changes; - -import java.io.IOException; -import java.io.LineNumberReader; -import java.io.Writer; -import java.util.ArrayList; -import java.util.List; -import java.util.Properties; - -import com.google.gridworks.history.Change; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; -import com.google.gridworks.util.Pool; - -public class MassRowChange implements Change { - final protected List _newRows; - protected List _oldRows; - - public MassRowChange(List newRows) { - _newRows = newRows; - } - - public void apply(Project project) { - synchronized (project) { - _oldRows = new ArrayList(project.rows); - project.rows.clear(); - project.rows.addAll(_newRows); - - project.update(); - } - } - - public void revert(Project project) { - synchronized (project) { - project.rows.clear(); - project.rows.addAll(_oldRows); - - project.update(); - } - } - - public void save(Writer writer, Properties options) throws IOException { - writer.write("newRowCount="); writer.write(Integer.toString(_newRows.size())); writer.write('\n'); - for (Row row : _newRows) { - row.save(writer, options); - writer.write('\n'); - } - writer.write("oldRowCount="); writer.write(Integer.toString(_oldRows.size())); writer.write('\n'); - for (Row row : _oldRows) { - row.save(writer, options); - writer.write('\n'); - } - writer.write("/ec/\n"); // end of change marker - } - - static public Change load(LineNumberReader reader, Pool pool) throws Exception { - List oldRows = null; - List newRows = null; - - String line; - while ((line = reader.readLine()) != null && !"/ec/".equals(line)) { - int equal = line.indexOf('='); - CharSequence field = line.subSequence(0, equal); - - if ("oldRowCount".equals(field)) { - int count = Integer.parseInt(line.substring(equal + 1)); - - oldRows = new ArrayList(count); - for (int i = 0; i < count; i++) { - line = reader.readLine(); - if (line != null) { - oldRows.add(Row.load(line, pool)); - } - } - } else if ("newRowCount".equals(field)) { - int count = Integer.parseInt(line.substring(equal + 1)); - - newRows = new ArrayList(count); - for (int i = 0; i < count; i++) { - line = reader.readLine(); - if (line != null) { - newRows.add(Row.load(line, pool)); - } - } - } - } - - MassRowChange change = new MassRowChange(newRows); - change._oldRows = oldRows; - - return change; - } -} diff --git a/main/src/com/google/gridworks/model/changes/MassRowColumnChange.java b/main/src/com/google/gridworks/model/changes/MassRowColumnChange.java deleted file mode 100644 index d51e4985e..000000000 --- a/main/src/com/google/gridworks/model/changes/MassRowColumnChange.java +++ /dev/null @@ -1,139 +0,0 @@ -package com.google.gridworks.model.changes; - -import java.io.IOException; -import java.io.LineNumberReader; -import java.io.Writer; -import java.util.ArrayList; -import java.util.List; -import java.util.Properties; - -import com.google.gridworks.history.Change; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; -import com.google.gridworks.util.Pool; - -public class MassRowColumnChange implements Change { - final protected List _newColumns; - final protected List _newRows; - protected List _oldColumns; - protected List _oldRows; - - public MassRowColumnChange(List newColumns, List newRows) { - _newColumns = newColumns; - _newRows = newRows; - } - - public void apply(Project project) { - synchronized (project) { - _oldColumns = new ArrayList(project.columnModel.columns); - _oldRows = new ArrayList(project.rows); - - project.columnModel.columns.clear(); - project.columnModel.columns.addAll(_newColumns); - - project.rows.clear(); - project.rows.addAll(_newRows); - - project.update(); - } - } - - public void revert(Project project) { - synchronized (project) { - project.columnModel.columns.clear(); - project.columnModel.columns.addAll(_oldColumns); - - project.rows.clear(); - project.rows.addAll(_oldRows); - - project.update(); - } - } - - public void save(Writer writer, Properties options) throws IOException { - writer.write("newColumnCount="); writer.write(Integer.toString(_newColumns.size())); writer.write('\n'); - for (Column column : _newColumns) { - column.save(writer); - writer.write('\n'); - } - writer.write("oldColumnCount="); writer.write(Integer.toString(_oldColumns.size())); writer.write('\n'); - for (Column column : _oldColumns) { - column.save(writer); - writer.write('\n'); - } - writer.write("newRowCount="); writer.write(Integer.toString(_newRows.size())); writer.write('\n'); - for (Row row : _newRows) { - row.save(writer, options); - writer.write('\n'); - } - writer.write("oldRowCount="); writer.write(Integer.toString(_oldRows.size())); writer.write('\n'); - for (Row row : _oldRows) { - row.save(writer, options); - writer.write('\n'); - } - writer.write("/ec/\n"); // end of change marker - } - - static public Change load(LineNumberReader reader, Pool pool) throws Exception { - List oldColumns = null; - List newColumns = null; - - List oldRows = null; - List newRows = null; - - String line; - while ((line = reader.readLine()) != null && !"/ec/".equals(line)) { - int equal = line.indexOf('='); - CharSequence field = line.subSequence(0, equal); - - if ("oldRowCount".equals(field)) { - int count = Integer.parseInt(line.substring(equal + 1)); - - oldRows = new ArrayList(count); - for (int i = 0; i < count; i++) { - line = reader.readLine(); - if (line != null) { - oldRows.add(Row.load(line, pool)); - } - } - } else if ("newRowCount".equals(field)) { - int count = Integer.parseInt(line.substring(equal + 1)); - - newRows = new ArrayList(count); - for (int i = 0; i < count; i++) { - line = reader.readLine(); - if (line != null) { - newRows.add(Row.load(line, pool)); - } - } - } else if ("oldColumnCount".equals(field)) { - int count = Integer.parseInt(line.substring(equal + 1)); - - oldColumns = new ArrayList(count); - for (int i = 0; i < count; i++) { - line = reader.readLine(); - if (line != null) { - oldColumns.add(Column.load(line)); - } - } - } else if ("newColumnCount".equals(field)) { - int count = Integer.parseInt(line.substring(equal + 1)); - - newColumns = new ArrayList(count); - for (int i = 0; i < count; i++) { - line = reader.readLine(); - if (line != null) { - newColumns.add(Column.load(line)); - } - } - } - } - - MassRowColumnChange change = new MassRowColumnChange(newColumns, newRows); - change._oldColumns = oldColumns; - change._oldRows = oldRows; - - return change; - } -} diff --git a/main/src/com/google/gridworks/model/changes/ReconChange.java b/main/src/com/google/gridworks/model/changes/ReconChange.java deleted file mode 100644 index 2077897ad..000000000 --- a/main/src/com/google/gridworks/model/changes/ReconChange.java +++ /dev/null @@ -1,174 +0,0 @@ -/** - * - */ -package com.google.gridworks.model.changes; - -import java.io.IOException; -import java.io.LineNumberReader; -import java.io.Writer; -import java.util.List; -import java.util.Properties; - -import com.google.gridworks.history.Change; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.ReconStats; -import com.google.gridworks.model.recon.ReconConfig; -import com.google.gridworks.util.ParsingUtilities; -import com.google.gridworks.util.Pool; - -public class ReconChange extends MassCellChange { - final protected ReconConfig _newReconConfig; - protected ReconStats _newReconStats; - - protected ReconConfig _oldReconConfig; - protected ReconStats _oldReconStats; - - public ReconChange( - List cellChanges, - String commonColumnName, - ReconConfig newReconConfig, - ReconStats newReconStats // can be null - ) { - super(cellChanges, commonColumnName, false); - _newReconConfig = newReconConfig; - _newReconStats = newReconStats; - } - - public ReconChange( - CellChange[] cellChanges, - String commonColumnName, - ReconConfig newReconConfig, - ReconStats newReconStats // can be null - ) { - super(cellChanges, commonColumnName, false); - _newReconConfig = newReconConfig; - _newReconStats = newReconStats; - } - - public ReconChange( - CellChange cellChange, - String commonColumnName, - ReconConfig newReconConfig, - ReconStats newReconStats // can be null - ) { - super(cellChange, commonColumnName, false); - _newReconConfig = newReconConfig; - _newReconStats = newReconStats; - } - - @Override - public void apply(Project project) { - synchronized (project) { - super.apply(project); - - Column column = project.columnModel.getColumnByName(_commonColumnName); - - if (_newReconStats == null) { - _newReconStats = ReconStats.create(project, column.getCellIndex()); - } - - _oldReconConfig = column.getReconConfig(); - _oldReconStats = column.getReconStats(); - - column.setReconConfig(_newReconConfig); - column.setReconStats(_newReconStats); - - column.clearPrecomputes(); - } - } - - @Override - public void revert(Project project) { - synchronized (project) { - super.revert(project); - - Column column = project.columnModel.getColumnByName(_commonColumnName); - column.setReconConfig(_oldReconConfig); - column.setReconStats(_oldReconStats); - - column.clearPrecomputes(); - } - } - - public void save(Writer writer, Properties options) throws IOException { - writer.write("newReconConfig="); - if (_newReconConfig != null) { - _newReconConfig.save(writer); - } - writer.write('\n'); - - writer.write("newReconStats="); - if (_newReconStats != null) { - _newReconStats.save(writer); - } - writer.write('\n'); - - writer.write("oldReconConfig="); - if (_oldReconConfig != null) { - _oldReconConfig.save(writer); - } - writer.write('\n'); - - writer.write("oldReconStats="); - if (_oldReconStats != null) { - _oldReconStats.save(writer); - } - writer.write('\n'); - - super.save(writer, options); - } - - static public Change load(LineNumberReader reader, Pool pool) throws Exception { - ReconConfig newReconConfig = null; - ReconStats newReconStats = null; - ReconConfig oldReconConfig = null; - ReconStats oldReconStats = null; - - String commonColumnName = null; - CellChange[] cellChanges = null; - - String line; - while ((line = reader.readLine()) != null && !"/ec/".equals(line)) { - int equal = line.indexOf('='); - - CharSequence field = line.subSequence(0, equal); - String value = line.substring(equal + 1); - - if ("newReconConfig".equals(field)) { - if (value.length() > 0) { - newReconConfig = ReconConfig.reconstruct(ParsingUtilities.evaluateJsonStringToObject(value)); - } - } else if ("newReconStats".equals(field)) { - if (value.length() > 0) { - newReconStats = ReconStats.load(ParsingUtilities.evaluateJsonStringToObject(value)); - } - } else if ("oldReconConfig".equals(field)) { - if (value.length() > 0) { - oldReconConfig = ReconConfig.reconstruct(ParsingUtilities.evaluateJsonStringToObject(value)); - } - } else if ("oldReconStats".equals(field)) { - if (value.length() > 0) { - oldReconStats = ReconStats.load(ParsingUtilities.evaluateJsonStringToObject(value)); - } - } else if ("commonColumnName".equals(field)) { - commonColumnName = value; - } else if ("cellChangeCount".equals(field)) { - int cellChangeCount = Integer.parseInt(value); - - cellChanges = new CellChange[cellChangeCount]; - for (int i = 0; i < cellChangeCount; i++) { - cellChanges[i] = CellChange.load(reader, pool); - } - } - } - - ReconChange change = new ReconChange( - cellChanges, commonColumnName, newReconConfig, newReconStats); - - change._oldReconConfig = oldReconConfig; - change._oldReconStats = oldReconStats; - - return change; - } -} \ No newline at end of file diff --git a/main/src/com/google/gridworks/model/changes/RowFlagChange.java b/main/src/com/google/gridworks/model/changes/RowFlagChange.java deleted file mode 100644 index d60ff5aae..000000000 --- a/main/src/com/google/gridworks/model/changes/RowFlagChange.java +++ /dev/null @@ -1,69 +0,0 @@ -package com.google.gridworks.model.changes; - -import java.io.IOException; -import java.io.LineNumberReader; -import java.io.Writer; -import java.util.Properties; - -import com.google.gridworks.history.Change; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; -import com.google.gridworks.util.Pool; - -public class RowFlagChange implements Change { - final int rowIndex; - final boolean newFlagged; - Boolean oldFlagged = null; - - public RowFlagChange(int rowIndex, boolean newFlagged) { - this.rowIndex = rowIndex; - this.newFlagged = newFlagged; - } - - public void apply(Project project) { - Row row = project.rows.get(rowIndex); - if (oldFlagged == null) { - oldFlagged = row.flagged; - } - row.flagged = newFlagged; - } - - public void revert(Project project) { - Row row = project.rows.get(rowIndex); - - row.flagged = oldFlagged; - } - - public void save(Writer writer, Properties options) throws IOException { - writer.write("row="); writer.write(Integer.toString(rowIndex)); writer.write('\n'); - writer.write("newFlagged="); writer.write(Boolean.toString(newFlagged)); writer.write('\n'); - writer.write("oldFlagged="); writer.write(Boolean.toString(oldFlagged)); writer.write('\n'); - writer.write("/ec/\n"); // end of change marker - } - - static public RowFlagChange load(LineNumberReader reader, Pool pool) throws Exception { - int row = -1; - boolean oldFlagged = false; - boolean newFlagged = false; - - String line; - while ((line = reader.readLine()) != null && !"/ec/".equals(line)) { - int equal = line.indexOf('='); - CharSequence field = line.subSequence(0, equal); - String value = line.substring(equal + 1); - - if ("row".equals(field)) { - row = Integer.parseInt(value); - } else if ("oldFlagged".equals(field)) { - oldFlagged = Boolean.parseBoolean(value); - } else if ("newFlagged".equals(field)) { - oldFlagged = Boolean.parseBoolean(value); - } - } - - RowFlagChange change = new RowFlagChange(row, newFlagged); - change.oldFlagged = oldFlagged; - - return change; - } -} \ No newline at end of file diff --git a/main/src/com/google/gridworks/model/changes/RowRemovalChange.java b/main/src/com/google/gridworks/model/changes/RowRemovalChange.java deleted file mode 100644 index 067082e61..000000000 --- a/main/src/com/google/gridworks/model/changes/RowRemovalChange.java +++ /dev/null @@ -1,109 +0,0 @@ -package com.google.gridworks.model.changes; - -import java.io.IOException; -import java.io.LineNumberReader; -import java.io.Writer; -import java.util.ArrayList; -import java.util.List; -import java.util.Properties; - -import com.google.gridworks.history.Change; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; -import com.google.gridworks.util.Pool; - -public class RowRemovalChange implements Change { - final protected List _rowIndices; - protected List _rows; - - public RowRemovalChange(List rowIndices) { - _rowIndices = rowIndices; - } - - public void apply(Project project) { - synchronized (project) { - int count = _rowIndices.size(); - - _rows = new ArrayList(count); - - int offset = 0; - for (int i = 0; i < count; i++) { - int index = _rowIndices.get(i); - - Row row = project.rows.remove(index + offset); - _rows.add(row); - - offset--; - } - - project.update(); - } - } - - public void revert(Project project) { - synchronized (project) { - int count = _rowIndices.size(); - - for (int i = 0; i < count; i++) { - int index = _rowIndices.get(i); - Row row = _rows.get(i); - - project.rows.add(index, row); - } - - project.update(); - } - } - - public void save(Writer writer, Properties options) throws IOException { - writer.write("rowIndexCount="); writer.write(Integer.toString(_rowIndices.size())); writer.write('\n'); - for (Integer index : _rowIndices) { - writer.write(index.toString()); - writer.write('\n'); - } - writer.write("rowCount="); writer.write(Integer.toString(_rows.size())); writer.write('\n'); - for (Row row : _rows) { - row.save(writer, options); - writer.write('\n'); - } - writer.write("/ec/\n"); // end of change marker - } - - static public Change load(LineNumberReader reader, Pool pool) throws Exception { - List rowIndices = null; - List rows = null; - - String line; - while ((line = reader.readLine()) != null && !"/ec/".equals(line)) { - int equal = line.indexOf('='); - CharSequence field = line.subSequence(0, equal); - - if ("rowIndexCount".equals(field)) { - int count = Integer.parseInt(line.substring(equal + 1)); - - rowIndices = new ArrayList(count); - for (int i = 0; i < count; i++) { - line = reader.readLine(); - if (line != null) { - rowIndices.add(Integer.parseInt(line)); - } - } - } else if ("rowCount".equals(field)) { - int count = Integer.parseInt(line.substring(equal + 1)); - - rows = new ArrayList(count); - for (int i = 0; i < count; i++) { - line = reader.readLine(); - if (line != null) { - rows.add(Row.load(line, pool)); - } - } - } - } - - RowRemovalChange change = new RowRemovalChange(rowIndices); - change._rows = rows; - - return change; - } -} diff --git a/main/src/com/google/gridworks/model/changes/RowReorderChange.java b/main/src/com/google/gridworks/model/changes/RowReorderChange.java deleted file mode 100644 index 035ba963e..000000000 --- a/main/src/com/google/gridworks/model/changes/RowReorderChange.java +++ /dev/null @@ -1,94 +0,0 @@ -package com.google.gridworks.model.changes; - -import java.io.IOException; -import java.io.LineNumberReader; -import java.io.Writer; -import java.util.ArrayList; -import java.util.List; -import java.util.Properties; - -import com.google.gridworks.history.Change; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; -import com.google.gridworks.util.Pool; - -public class RowReorderChange implements Change { - final protected List _rowIndices; - - public RowReorderChange(List rowIndices) { - _rowIndices = rowIndices; - } - - public void apply(Project project) { - synchronized (project) { - List oldRows = project.rows; - List newRows = new ArrayList(oldRows.size()); - - for (Integer oldIndex : _rowIndices) { - newRows.add(oldRows.get(oldIndex)); - } - - project.rows.clear(); - project.rows.addAll(newRows); - project.update(); - } - } - - public void revert(Project project) { - synchronized (project) { - int count = project.rows.size(); - - List newRows = project.rows; - List oldRows = new ArrayList(count); - - for (int r = 0; r < count; r++) { - oldRows.add(null); - } - - for (int newIndex = 0; newIndex < count; newIndex++) { - int oldIndex = _rowIndices.get(newIndex); - Row row = newRows.get(newIndex); - oldRows.set(oldIndex, row); - } - - project.rows.clear(); - project.rows.addAll(oldRows); - project.update(); - } - } - - public void save(Writer writer, Properties options) throws IOException { - writer.write("rowIndexCount="); writer.write(Integer.toString(_rowIndices.size())); writer.write('\n'); - for (Integer index : _rowIndices) { - writer.write(index.toString()); - writer.write('\n'); - } - writer.write("/ec/\n"); // end of change marker - } - - static public Change load(LineNumberReader reader, Pool pool) throws Exception { - List rowIndices = null; - - String line; - while ((line = reader.readLine()) != null && !"/ec/".equals(line)) { - int equal = line.indexOf('='); - CharSequence field = line.subSequence(0, equal); - - if ("rowIndexCount".equals(field)) { - int count = Integer.parseInt(line.substring(equal + 1)); - - rowIndices = new ArrayList(count); - for (int i = 0; i < count; i++) { - line = reader.readLine(); - if (line != null) { - rowIndices.add(Integer.parseInt(line)); - } - } - } - } - - RowReorderChange change = new RowReorderChange(rowIndices); - - return change; - } -} diff --git a/main/src/com/google/gridworks/model/changes/RowStarChange.java b/main/src/com/google/gridworks/model/changes/RowStarChange.java deleted file mode 100644 index 1ad7c41b5..000000000 --- a/main/src/com/google/gridworks/model/changes/RowStarChange.java +++ /dev/null @@ -1,69 +0,0 @@ -package com.google.gridworks.model.changes; - -import java.io.IOException; -import java.io.LineNumberReader; -import java.io.Writer; -import java.util.Properties; - -import com.google.gridworks.history.Change; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; -import com.google.gridworks.util.Pool; - -public class RowStarChange implements Change { - final int rowIndex; - final boolean newStarred; - Boolean oldStarred = null; - - public RowStarChange(int rowIndex, boolean newStarred) { - this.rowIndex = rowIndex; - this.newStarred = newStarred; - } - - public void apply(Project project) { - Row row = project.rows.get(rowIndex); - if (oldStarred == null) { - oldStarred = row.starred; - } - row.starred = newStarred; - } - - public void revert(Project project) { - Row row = project.rows.get(rowIndex); - - row.starred = oldStarred; - } - - public void save(Writer writer, Properties options) throws IOException { - writer.write("row="); writer.write(Integer.toString(rowIndex)); writer.write('\n'); - writer.write("newStarred="); writer.write(Boolean.toString(newStarred)); writer.write('\n'); - writer.write("oldStarred="); writer.write(Boolean.toString(oldStarred)); writer.write('\n'); - writer.write("/ec/\n"); // end of change marker - } - - static public RowStarChange load(LineNumberReader reader, Pool pool) throws Exception { - int row = -1; - boolean oldStarred = false; - boolean newStarred = false; - - String line; - while ((line = reader.readLine()) != null && !"/ec/".equals(line)) { - int equal = line.indexOf('='); - CharSequence field = line.subSequence(0, equal); - String value = line.substring(equal + 1); - - if ("row".equals(field)) { - row = Integer.parseInt(value); - } else if ("oldStarred".equals(field)) { - oldStarred = Boolean.parseBoolean(value); - } else if ("newStarred".equals(field)) { - oldStarred = Boolean.parseBoolean(value); - } - } - - RowStarChange change = new RowStarChange(row, newStarred); - change.oldStarred = oldStarred; - - return change; - } -} \ No newline at end of file diff --git a/main/src/com/google/gridworks/model/recon/DataExtensionReconConfig.java b/main/src/com/google/gridworks/model/recon/DataExtensionReconConfig.java deleted file mode 100644 index a8bd9f492..000000000 --- a/main/src/com/google/gridworks/model/recon/DataExtensionReconConfig.java +++ /dev/null @@ -1,63 +0,0 @@ -package com.google.gridworks.model.recon; - -import java.util.List; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Recon; -import com.google.gridworks.model.Row; -import com.google.gridworks.protograph.FreebaseType; - -public class DataExtensionReconConfig extends StrictReconConfig { - final public FreebaseType type; - - private final static String WARN = "Not implemented"; - - static public ReconConfig reconstruct(JSONObject obj) throws Exception { - JSONObject type = obj.getJSONObject("type"); - - return new DataExtensionReconConfig( - new FreebaseType( - type.getString("id"), - type.getString("name") - ) - ); - } - - public DataExtensionReconConfig(FreebaseType type) { - this.type = type; - } - - @Override - public ReconJob createJob(Project project, int rowIndex, Row row, - String columnName, Cell cell) { - throw new RuntimeException(WARN); - } - - @Override - public int getBatchSize() { - throw new RuntimeException(WARN); - } - - public void write(JSONWriter writer, Properties options) throws JSONException { - writer.object(); - writer.key("mode"); writer.value("extend"); - writer.key("type"); type.write(writer, options); - writer.endObject(); - } - - @Override - public List batchRecon(List jobs, long historyEntryID) { - throw new RuntimeException(WARN); - } - - @Override - public String getBriefDescription(Project project, String columnName) { - throw new RuntimeException(WARN); - } -} diff --git a/main/src/com/google/gridworks/model/recon/GuidBasedReconConfig.java b/main/src/com/google/gridworks/model/recon/GuidBasedReconConfig.java deleted file mode 100644 index a1d53064b..000000000 --- a/main/src/com/google/gridworks/model/recon/GuidBasedReconConfig.java +++ /dev/null @@ -1,174 +0,0 @@ -package com.google.gridworks.model.recon; - -import java.io.InputStream; -import java.io.StringWriter; -import java.net.URL; -import java.net.URLConnection; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Properties; - -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Recon; -import com.google.gridworks.model.ReconCandidate; -import com.google.gridworks.model.Row; -import com.google.gridworks.model.Recon.Judgment; -import com.google.gridworks.util.ParsingUtilities; - -public class GuidBasedReconConfig extends StrictReconConfig { - static public ReconConfig reconstruct(JSONObject obj) throws Exception { - return new GuidBasedReconConfig(); - } - - public GuidBasedReconConfig() { - } - - static protected class GuidBasedReconJob extends ReconJob { - String guid; - - public int getKey() { - return guid.hashCode(); - } - } - - @Override - public ReconJob createJob(Project project, int rowIndex, Row row, - String columnName, Cell cell) { - - GuidBasedReconJob job = new GuidBasedReconJob(); - String s = cell.value.toString(); - - if (s.startsWith("/guid/")) { - s = "#" + s.substring(6); - } else if (!s.startsWith("#")) { - s = "#" + s; - } - - job.guid = s; - - return job; - } - - @Override - public int getBatchSize() { - return 10; - } - - @Override - public String getBriefDescription(Project project, String columnName) { - return "Reconcile cells in column " + columnName + " as Freebase IDs"; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("mode"); writer.value("strict"); - writer.key("match"); writer.value("id"); - writer.endObject(); - } - - @Override - public List batchRecon(List jobs, long historyEntryID) { - List recons = new ArrayList(jobs.size()); - Map guidToRecon = new HashMap(); - - try { - String query = null; - { - StringWriter stringWriter = new StringWriter(); - JSONWriter jsonWriter = new JSONWriter(stringWriter); - - jsonWriter.object(); - jsonWriter.key("query"); - jsonWriter.array(); - jsonWriter.object(); - - jsonWriter.key("id"); jsonWriter.value(null); - jsonWriter.key("name"); jsonWriter.value(null); - jsonWriter.key("guid"); jsonWriter.value(null); - jsonWriter.key("type"); jsonWriter.array(); jsonWriter.endArray(); - - jsonWriter.key("guid|="); - jsonWriter.array(); - for (ReconJob job : jobs) { - jsonWriter.value(((GuidBasedReconJob) job).guid); - } - jsonWriter.endArray(); - - jsonWriter.endObject(); - jsonWriter.endArray(); - jsonWriter.endObject(); - - query = stringWriter.toString(); - } - - StringBuffer sb = new StringBuffer(1024); - sb.append(s_mqlreadService); - sb.append("?query="); - sb.append(ParsingUtilities.encode(query)); - - URL url = new URL(sb.toString()); - URLConnection connection = url.openConnection(); - connection.setConnectTimeout(5000); - connection.connect(); - - InputStream is = connection.getInputStream(); - try { - String s = ParsingUtilities.inputStreamToString(is); - JSONObject o = ParsingUtilities.evaluateJsonStringToObject(s); - JSONArray results = o.getJSONArray("result"); - int count = results.length(); - - for (int i = 0; i < count; i++) { - JSONObject result = results.getJSONObject(i); - - String guid = result.getString("guid"); - - JSONArray types = result.getJSONArray("type"); - String[] typeIDs = new String[types.length()]; - for (int j = 0; j < typeIDs.length; j++) { - typeIDs[j] = types.getString(j); - } - - ReconCandidate candidate = new ReconCandidate( - result.getString("id"), - result.getString("name"), - typeIDs, - 100 - ); - - Recon recon = Recon.makeFreebaseRecon(historyEntryID); - recon.addCandidate(candidate); - recon.service = "mql"; - recon.judgment = Judgment.Matched; - recon.judgmentAction = "auto"; - recon.match = candidate; - recon.matchRank = 0; - - guidToRecon.put(guid, recon); - } - } finally { - is.close(); - } - } catch (Exception e) { - e.printStackTrace(); - } - - for (int i = 0; i < jobs.size(); i++) { - String guid = ((GuidBasedReconJob) jobs.get(i)).guid; - Recon recon = guidToRecon.get(guid); - recons.add(recon); - } - - return recons; - } -} diff --git a/main/src/com/google/gridworks/model/recon/IdBasedReconConfig.java b/main/src/com/google/gridworks/model/recon/IdBasedReconConfig.java deleted file mode 100644 index 6bb78ec4b..000000000 --- a/main/src/com/google/gridworks/model/recon/IdBasedReconConfig.java +++ /dev/null @@ -1,179 +0,0 @@ -package com.google.gridworks.model.recon; - -import java.io.InputStream; -import java.io.StringWriter; -import java.net.URL; -import java.net.URLConnection; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Properties; - -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Recon; -import com.google.gridworks.model.ReconCandidate; -import com.google.gridworks.model.Row; -import com.google.gridworks.model.Recon.Judgment; -import com.google.gridworks.util.ParsingUtilities; - -public class IdBasedReconConfig extends StrictReconConfig { - static public ReconConfig reconstruct(JSONObject obj) throws Exception { - return new IdBasedReconConfig(); - } - - public IdBasedReconConfig() { - } - - static protected class IdBasedReconJob extends ReconJob { - String id; - - public int getKey() { - return id.hashCode(); - } - } - - @Override - public ReconJob createJob(Project project, int rowIndex, Row row, - String columnName, Cell cell) { - - IdBasedReconJob job = new IdBasedReconJob(); - String s = cell.value.toString(); - - if (!s.startsWith("/")) { - if (s.startsWith("92")) { - s = "/guid/" + s; - } else if (!s.contains("/")){ - s = "/en/" + s; - } else { - s = "/" + s; - } - } - - job.id = s; - - return job; - } - - @Override - public int getBatchSize() { - return 10; - } - - @Override - public String getBriefDescription(Project project, String columnName) { - return "Reconcile cells in column " + columnName + " as Freebase IDs"; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("mode"); writer.value("strict"); - writer.key("match"); writer.value("id"); - writer.endObject(); - } - - @Override - public List batchRecon(List jobs, long historyEntryID) { - List recons = new ArrayList(jobs.size()); - Map idToRecon = new HashMap(); - - try { - String query = null; - { - StringWriter stringWriter = new StringWriter(); - JSONWriter jsonWriter = new JSONWriter(stringWriter); - - jsonWriter.object(); - jsonWriter.key("query"); - jsonWriter.array(); - jsonWriter.object(); - - jsonWriter.key("id"); jsonWriter.value(null); - jsonWriter.key("name"); jsonWriter.value(null); - jsonWriter.key("guid"); jsonWriter.value(null); - jsonWriter.key("type"); jsonWriter.array(); jsonWriter.endArray(); - - jsonWriter.key("id|="); - jsonWriter.array(); - for (ReconJob job : jobs) { - jsonWriter.value(((IdBasedReconJob) job).id); - } - jsonWriter.endArray(); - - jsonWriter.endObject(); - jsonWriter.endArray(); - jsonWriter.endObject(); - - query = stringWriter.toString(); - } - - StringBuffer sb = new StringBuffer(1024); - sb.append(s_mqlreadService); - sb.append("?query="); - sb.append(ParsingUtilities.encode(query)); - - URL url = new URL(sb.toString()); - URLConnection connection = url.openConnection(); - connection.setConnectTimeout(5000); - connection.connect(); - - InputStream is = connection.getInputStream(); - try { - String s = ParsingUtilities.inputStreamToString(is); - JSONObject o = ParsingUtilities.evaluateJsonStringToObject(s); - JSONArray results = o.getJSONArray("result"); - int count = results.length(); - - for (int i = 0; i < count; i++) { - JSONObject result = results.getJSONObject(i); - - String id = result.getString("id"); - - JSONArray types = result.getJSONArray("type"); - String[] typeIDs = new String[types.length()]; - for (int j = 0; j < typeIDs.length; j++) { - typeIDs[j] = types.getString(j); - } - - ReconCandidate candidate = new ReconCandidate( - id, - result.getString("name"), - typeIDs, - 100 - ); - - Recon recon = Recon.makeFreebaseRecon(historyEntryID); - recon.addCandidate(candidate); - recon.service = "mql"; - recon.judgment = Judgment.Matched; - recon.judgmentAction = "auto"; - recon.match = candidate; - recon.matchRank = 0; - - idToRecon.put(id, recon); - } - } finally { - is.close(); - } - } catch (Exception e) { - e.printStackTrace(); - } - - for (int i = 0; i < jobs.size(); i++) { - String id = ((IdBasedReconJob) jobs.get(i)).id; - Recon recon = idToRecon.get(id); - recons.add(recon); - } - - return recons; - } - -} diff --git a/main/src/com/google/gridworks/model/recon/KeyBasedReconConfig.java b/main/src/com/google/gridworks/model/recon/KeyBasedReconConfig.java deleted file mode 100644 index e65b6453f..000000000 --- a/main/src/com/google/gridworks/model/recon/KeyBasedReconConfig.java +++ /dev/null @@ -1,193 +0,0 @@ -package com.google.gridworks.model.recon; - -import java.io.InputStream; -import java.io.StringWriter; -import java.net.URL; -import java.net.URLConnection; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Properties; - -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Recon; -import com.google.gridworks.model.ReconCandidate; -import com.google.gridworks.model.Row; -import com.google.gridworks.model.Recon.Judgment; -import com.google.gridworks.protograph.FreebaseTopic; -import com.google.gridworks.util.ParsingUtilities; - -public class KeyBasedReconConfig extends StrictReconConfig { - final public FreebaseTopic namespace; - - static public ReconConfig reconstruct(JSONObject obj) throws Exception { - JSONObject ns = obj.getJSONObject("namespace"); - - return new KeyBasedReconConfig( - new FreebaseTopic( - ns.getString("id"), - ns.getString("name") - ) - ); - } - - public KeyBasedReconConfig(FreebaseTopic namespace) { - this.namespace = namespace; - } - - static protected class KeyBasedReconJob extends ReconJob { - String key; - - public int getKey() { - return key.hashCode(); - } - } - - @Override - public ReconJob createJob(Project project, int rowIndex, Row row, - String columnName, Cell cell) { - - KeyBasedReconJob job = new KeyBasedReconJob(); - - job.key = cell.value.toString().replace(' ', '_'); - - return job; - } - - @Override - public int getBatchSize() { - return 10; - } - - @Override - public String getBriefDescription(Project project, String columnName) { - return "Reconcile cells in column " + columnName + " to topics with keys in namespace " + namespace.id; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("mode"); writer.value("strict"); - writer.key("match"); writer.value("key"); - writer.key("namespace"); namespace.write(writer, options); - writer.endObject(); - } - - @Override - public List batchRecon(List jobs, long historyEntryID) { - List recons = new ArrayList(jobs.size()); - Map keyToRecon = new HashMap(); - - try { - String query = null; - { - StringWriter stringWriter = new StringWriter(); - JSONWriter jsonWriter = new JSONWriter(stringWriter); - - jsonWriter.object(); - jsonWriter.key("query"); - jsonWriter.array(); - jsonWriter.object(); - - jsonWriter.key("id"); jsonWriter.value(null); - jsonWriter.key("name"); jsonWriter.value(null); - jsonWriter.key("guid"); jsonWriter.value(null); - jsonWriter.key("type"); jsonWriter.array(); jsonWriter.endArray(); - - jsonWriter.key("key"); - jsonWriter.array(); - jsonWriter.object(); - - jsonWriter.key("namespace"); - jsonWriter.object(); - jsonWriter.key("id"); jsonWriter.value(namespace.id); - jsonWriter.endObject(); - - jsonWriter.key("value"); jsonWriter.value(null); - jsonWriter.key("value|="); - jsonWriter.array(); - for (ReconJob job : jobs) { - jsonWriter.value(((KeyBasedReconJob) job).key); - } - jsonWriter.endArray(); - - jsonWriter.endObject(); - jsonWriter.endArray(); - - jsonWriter.endObject(); - jsonWriter.endArray(); - jsonWriter.endObject(); - - query = stringWriter.toString(); - } - - StringBuffer sb = new StringBuffer(1024); - sb.append(s_mqlreadService); - sb.append("?query="); - sb.append(ParsingUtilities.encode(query)); - - URL url = new URL(sb.toString()); - URLConnection connection = url.openConnection(); - connection.setConnectTimeout(5000); - connection.connect(); - - InputStream is = connection.getInputStream(); - try { - String s = ParsingUtilities.inputStreamToString(is); - JSONObject o = ParsingUtilities.evaluateJsonStringToObject(s); - JSONArray results = o.getJSONArray("result"); - int count = results.length(); - - for (int i = 0; i < count; i++) { - JSONObject result = results.getJSONObject(i); - - String key = result.getJSONArray("key").getJSONObject(0).getString("value"); - - JSONArray types = result.getJSONArray("type"); - String[] typeIDs = new String[types.length()]; - for (int j = 0; j < typeIDs.length; j++) { - typeIDs[j] = types.getString(j); - } - - ReconCandidate candidate = new ReconCandidate( - result.getString("id"), - result.getString("name"), - typeIDs, - 100 - ); - - Recon recon = Recon.makeFreebaseRecon(historyEntryID); - recon.addCandidate(candidate); - recon.service = "mql"; - recon.judgment = Judgment.Matched; - recon.judgmentAction = "auto"; - recon.match = candidate; - recon.matchRank = 0; - - keyToRecon.put(key, recon); - } - } finally { - is.close(); - } - } catch (Exception e) { - e.printStackTrace(); - } - - for (int i = 0; i < jobs.size(); i++) { - String key = ((KeyBasedReconJob) jobs.get(i)).key; - Recon recon = keyToRecon.get(key); - recons.add(recon); - } - - return recons; - } - -} diff --git a/main/src/com/google/gridworks/model/recon/ReconConfig.java b/main/src/com/google/gridworks/model/recon/ReconConfig.java deleted file mode 100644 index f489522f4..000000000 --- a/main/src/com/google/gridworks/model/recon/ReconConfig.java +++ /dev/null @@ -1,54 +0,0 @@ -package com.google.gridworks.model.recon; - -import java.io.Writer; -import java.util.List; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.Jsonizable; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Recon; -import com.google.gridworks.model.Row; - -abstract public class ReconConfig implements Jsonizable { - static public ReconConfig reconstruct(JSONObject obj) throws Exception { - String mode = obj.getString("mode"); - if ("standard-service".equals(mode) || - "heuristic".equals(mode) // legacy - ) { - return StandardReconConfig.reconstruct(obj); - } else if ("strict".equals(mode)) { - return StrictReconConfig.reconstruct(obj); - } else if ("extend".equals(mode)) { - return DataExtensionReconConfig.reconstruct(obj); - } - return null; - } - - abstract public int getBatchSize(); - - abstract public String getBriefDescription(Project project, String columnName); - - abstract public ReconJob createJob( - Project project, - int rowIndex, - Row row, - String columnName, - Cell cell - ); - - abstract public List batchRecon(List jobs, long historyEntryID); - - public void save(Writer writer) { - JSONWriter jsonWriter = new JSONWriter(writer); - try { - write(jsonWriter, new Properties()); - } catch (JSONException e) { - e.printStackTrace(); - } - } -} diff --git a/main/src/com/google/gridworks/model/recon/ReconJob.java b/main/src/com/google/gridworks/model/recon/ReconJob.java deleted file mode 100644 index fc669f166..000000000 --- a/main/src/com/google/gridworks/model/recon/ReconJob.java +++ /dev/null @@ -1,5 +0,0 @@ -package com.google.gridworks.model.recon; - -abstract public class ReconJob { - abstract public int getKey(); -} diff --git a/main/src/com/google/gridworks/model/recon/StandardReconConfig.java b/main/src/com/google/gridworks/model/recon/StandardReconConfig.java deleted file mode 100644 index 48dad2909..000000000 --- a/main/src/com/google/gridworks/model/recon/StandardReconConfig.java +++ /dev/null @@ -1,413 +0,0 @@ -package com.google.gridworks.model.recon; - -import java.io.DataOutputStream; -import java.io.InputStream; -import java.io.StringWriter; -import java.net.URL; -import java.net.URLConnection; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import java.util.Properties; -import java.util.Set; - -import org.apache.commons.lang.StringUtils; -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Recon; -import com.google.gridworks.model.ReconCandidate; -import com.google.gridworks.model.Row; -import com.google.gridworks.model.Recon.Judgment; -import com.google.gridworks.model.RecordModel.RowDependency; -import com.google.gridworks.protograph.FreebaseProperty; -import com.google.gridworks.util.ParsingUtilities; - -public class StandardReconConfig extends ReconConfig { - final static Logger logger = LoggerFactory.getLogger("gridworks-standard-recon"); - - static public class ColumnDetail { - final public String columnName; - final public FreebaseProperty property; - - public ColumnDetail(String columnName, FreebaseProperty property) { - this.columnName = columnName; - this.property = property; - } - } - - static public ReconConfig reconstruct(JSONObject obj) throws Exception { - List columnDetails = null; - if (obj.has("columnDetails")) { - JSONArray columnDetailsA = obj.getJSONArray("columnDetails"); - int l = columnDetailsA.length(); - - columnDetails = new ArrayList(l); - for (int i = 0; i < l; i++) { - JSONObject o = columnDetailsA.getJSONObject(i); - JSONObject p = o.getJSONObject("property"); - - columnDetails.add(new ColumnDetail( - o.getString("column"), - new FreebaseProperty( - p.getString("id"), - p.getString("name") - ) - )); - } - } else { - columnDetails = new ArrayList(); - } - - JSONObject t = obj.has("type") && !obj.isNull("type") ? obj.getJSONObject("type") : null; - - return new StandardReconConfig( - obj.getString("service"), - obj.has("identifierSpace") ? obj.getString("identifierSpace") : null, - obj.has("schemaSpace") ? obj.getString("schemaSpace") : null, - t == null ? null : t.getString("id"), - t == null ? null : (t.has("name") ? t.getString("name") : null), - obj.getBoolean("autoMatch"), - columnDetails - ); - } - - static protected class StandardReconJob extends ReconJob { - String text; - String code; - - public int getKey() { - return code.hashCode(); - } - } - - final public String service; - final public String identifierSpace; - final public String schemaSpace; - - final public String typeID; - final public String typeName; - final public boolean autoMatch; - final public List columnDetails; - - public StandardReconConfig( - String service, - String identifierSpace, - String schemaSpace, - - String typeID, - String typeName, - boolean autoMatch, - List columnDetails - ) { - this.service = service; - this.identifierSpace = identifierSpace; - this.schemaSpace = schemaSpace; - - this.typeID = typeID; - this.typeName = typeName; - this.autoMatch = autoMatch; - this.columnDetails = columnDetails; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("mode"); writer.value("standard-service"); - writer.key("service"); writer.value(service); - writer.key("identifierSpace"); writer.value(identifierSpace); - writer.key("schemaSpace"); writer.value(schemaSpace); - writer.key("type"); - if (typeID == null) { - writer.value(null); - } else { - writer.object(); - writer.key("id"); writer.value(typeID); - writer.key("name"); writer.value(typeName); - writer.endObject(); - } - writer.key("autoMatch"); writer.value(autoMatch); - writer.key("columnDetails"); - writer.array(); - for (ColumnDetail c : columnDetails) { - writer.object(); - writer.key("column"); writer.value(c.columnName); - writer.key("property"); c.property.write(writer, options); - writer.endObject(); - } - writer.endArray(); - writer.endObject(); - } - - @Override - public int getBatchSize() { - return 7; - } - - @Override - public String getBriefDescription(Project project, String columnName) { - return "Reconcile cells in column " + columnName + " to type " + typeID; - } - - @Override - public ReconJob createJob(Project project, int rowIndex, Row row, - String columnName, Cell cell) { - - StandardReconJob job = new StandardReconJob(); - - try { - StringWriter stringWriter = new StringWriter(); - JSONWriter jsonWriter = new JSONWriter(stringWriter); - - jsonWriter.object(); - jsonWriter.key("query"); jsonWriter.value(cell.value.toString()); - if (typeID != null) { - jsonWriter.key("type"); jsonWriter.value(typeID); - } - - if (columnDetails.size() > 0) { - jsonWriter.key("properties"); - jsonWriter.array(); - - for (ColumnDetail c : columnDetails) { - int detailCellIndex = project.columnModel.getColumnByName(c.columnName).getCellIndex(); - - Cell cell2 = row.getCell(detailCellIndex); - if (cell2 == null || !ExpressionUtils.isNonBlankData(cell2.value)) { - int cellIndex = project.columnModel.getColumnByName(columnName).getCellIndex(); - - RowDependency rd = project.recordModel.getRowDependency(rowIndex); - if (rd != null && rd.cellDependencies != null) { - int contextRowIndex = rd.cellDependencies[cellIndex].rowIndex; - if (contextRowIndex >= 0 && contextRowIndex < project.rows.size()) { - Row row2 = project.rows.get(contextRowIndex); - - cell2 = row2.getCell(detailCellIndex); - } - } - } - - if (cell2 != null && ExpressionUtils.isNonBlankData(cell2.value)) { - jsonWriter.object(); - - jsonWriter.key("pid"); jsonWriter.value(c.property.id); - jsonWriter.key("v"); - if (cell2.recon != null && cell2.recon.match != null) { - jsonWriter.object(); - jsonWriter.key("id"); jsonWriter.value(cell2.recon.match.id); - jsonWriter.key("name"); jsonWriter.value(cell2.recon.match.name); - jsonWriter.endObject(); - } else { - jsonWriter.value(cell2.value.toString()); - } - - jsonWriter.endObject(); - } - } - - jsonWriter.endArray(); - } - jsonWriter.endObject(); - - job.text = cell.value.toString(); - job.code = stringWriter.toString(); - } catch (JSONException e) { - // - } - return job; - } - - @Override - public List batchRecon(List jobs, long historyEntryID) { - List recons = new ArrayList(jobs.size()); - - StringWriter stringWriter = new StringWriter(); - - stringWriter.write("{"); - for (int i = 0; i < jobs.size(); i++) { - StandardReconJob job = (StandardReconJob) jobs.get(i); - if (i > 0) { - stringWriter.write(","); - } - stringWriter.write("\"q" + i + "\":"); - stringWriter.write(job.code); - } - stringWriter.write("}"); - String queriesString = stringWriter.toString(); - - try { - URL url = new URL(service); - URLConnection connection = url.openConnection(); - { - connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded"); - connection.setConnectTimeout(30000); - connection.setDoOutput(true); - - DataOutputStream dos = new DataOutputStream(connection.getOutputStream()); - try { - String body = "queries=" + ParsingUtilities.encode(queriesString); - - dos.writeBytes(body); - } finally { - dos.flush(); - dos.close(); - } - - connection.connect(); - } - - InputStream is = connection.getInputStream(); - try { - String s = ParsingUtilities.inputStreamToString(is); - JSONObject o = ParsingUtilities.evaluateJsonStringToObject(s); - - for (int i = 0; i < jobs.size(); i++) { - StandardReconJob job = (StandardReconJob) jobs.get(i); - Recon recon = null; - - String text = job.text; - String key = "q" + i; - if (o.has(key)) { - JSONObject o2 = o.getJSONObject(key); - if (o2.has("result")) { - JSONArray results = o2.getJSONArray("result"); - - recon = createReconServiceResults(text, results, historyEntryID); - } - } - - if (recon == null) { - recon = new Recon(historyEntryID, identifierSpace, schemaSpace); - } - recon.service = service; - - recons.add(recon); - } - } finally { - is.close(); - } - } catch (Exception e) { - logger.error("Failed to batch recon with load:\n" + queriesString, e); - } - - while (recons.size() < jobs.size()) { - Recon recon = new Recon(historyEntryID, identifierSpace, schemaSpace); - recon.service = service; - recon.identifierSpace = identifierSpace; - recon.schemaSpace = schemaSpace; - - recons.add(recon); - } - - return recons; - } - - protected Recon createReconServiceResults(String text, JSONArray results, long historyEntryID) { - Recon recon = new Recon(historyEntryID, identifierSpace, schemaSpace); - try { - int length = results.length(); - int count = 0; - for (int i = 0; i < length && count < 3; i++) { - JSONObject result = results.getJSONObject(i); - if (!result.has("name")) { - continue; - } - - JSONArray types = result.getJSONArray("type"); - String[] typeIDs = new String[types.length()]; - for (int j = 0; j < typeIDs.length; j++) { - Object type = types.get(j); - typeIDs[j] = type instanceof String ? (String) type : - ((JSONObject) type).getString("id"); - } - - double score = result.getDouble("score"); - ReconCandidate candidate = new ReconCandidate( - result.getString("id"), - result.getString("name"), - typeIDs, - score - ); - - if (i == 0 && result.has("match") && result.getBoolean("match")) { - recon.match = candidate; - recon.matchRank = 0; - recon.judgment = Judgment.Matched; - recon.judgmentAction = "auto"; - } - - recon.addCandidate(candidate); - count++; - } - - if (count > 0) { - ReconCandidate candidate = recon.candidates.get(0); - - recon.setFeature(Recon.Feature_nameMatch, text.equalsIgnoreCase(candidate.name)); - recon.setFeature(Recon.Feature_nameLevenshtein, StringUtils.getLevenshteinDistance(text, candidate.name)); - recon.setFeature(Recon.Feature_nameWordDistance, wordDistance(text, candidate.name)); - - recon.setFeature(Recon.Feature_typeMatch, false); - if (this.typeID != null) { - for (String typeID : candidate.types) { - if (this.typeID.equals(typeID)) { - recon.setFeature(Recon.Feature_typeMatch, true); - break; - } - } - } - } - } catch (JSONException e) { - e.printStackTrace(); - } - return recon; - } - - static protected double wordDistance(String s1, String s2) { - Set words1 = breakWords(s1); - Set words2 = breakWords(s2); - return words1.size() >= words2.size() ? wordDistance(words1, words2) : wordDistance(words2, words1); - } - - static protected double wordDistance(Set longWords, Set shortWords) { - double common = 0; - for (String word : shortWords) { - if (longWords.contains(word)) { - common++; - } - } - return common / longWords.size(); - } - - static final protected Set s_stopWords = new HashSet(); - static { - s_stopWords.add("the"); - s_stopWords.add("a"); - s_stopWords.add("and"); - s_stopWords.add("of"); - s_stopWords.add("on"); - s_stopWords.add("in"); - s_stopWords.add("at"); - s_stopWords.add("by"); - } - - static protected Set breakWords(String s) { - String[] words = s.toLowerCase().split("\\s+"); - - Set set = new HashSet(words.length); - for (String word : words) { - if (!s_stopWords.contains(word)) { - set.add(word); - } - } - return set; - } -} diff --git a/main/src/com/google/gridworks/model/recon/StrictReconConfig.java b/main/src/com/google/gridworks/model/recon/StrictReconConfig.java deleted file mode 100644 index 6ffa4c386..000000000 --- a/main/src/com/google/gridworks/model/recon/StrictReconConfig.java +++ /dev/null @@ -1,19 +0,0 @@ -package com.google.gridworks.model.recon; - -import org.json.JSONObject; - -abstract public class StrictReconConfig extends ReconConfig { - final static protected String s_mqlreadService = "http://api.freebase.com/api/service/mqlread"; - - static public ReconConfig reconstruct(JSONObject obj) throws Exception { - String match = obj.getString("match"); - if ("key".equals(match)) { - return KeyBasedReconConfig.reconstruct(obj); - } else if ("id".equals(match)) { - return IdBasedReconConfig.reconstruct(obj); - } else if ("guid".equals(match)) { - return GuidBasedReconConfig.reconstruct(obj); - } - return null; - } -} diff --git a/main/src/com/google/gridworks/oauth/Credentials.java b/main/src/com/google/gridworks/oauth/Credentials.java deleted file mode 100644 index ba105b95b..000000000 --- a/main/src/com/google/gridworks/oauth/Credentials.java +++ /dev/null @@ -1,82 +0,0 @@ -package com.google.gridworks.oauth; - -import javax.servlet.http.Cookie; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import oauth.signpost.OAuth; -import oauth.signpost.http.HttpParameters; - -import com.google.gridworks.util.CookiesUtilities; - -public class Credentials { - - private static final String TOKEN = "oauth_token"; - private static final String SECRET = "oauth_token_secret"; - - public enum Type { - REQUEST("request"), - ACCESS("access"); - - private final String postfix; - - Type(String postfix) { - this.postfix = postfix; - } - - public String getCookieName(Provider provider) { - if (provider == null) throw new RuntimeException("Provider can't be null"); - return provider.getHost() + "_" + postfix; - } - }; - - public static Credentials getCredentials(HttpServletRequest request, Provider provider, Type type) { - Cookie cookie = CookiesUtilities.getCookie(request, type.getCookieName(provider)); - return (cookie == null) ? null : makeCredentials(cookie.getValue(), provider); - } - - public static void setCredentials(HttpServletRequest request, HttpServletResponse response, Credentials credentials, Type type, int max_age) { - String name = type.getCookieName(credentials.getProvider()); - String value = credentials.toString(); - CookiesUtilities.setCookie(request, response, name, value, max_age); - } - - public static void deleteCredentials(HttpServletRequest request, HttpServletResponse response, Provider provider, Type type) { - CookiesUtilities.deleteCookie(request, response, type.getCookieName(provider)); - } - - public static Credentials makeCredentials(String str, Provider provider) { - HttpParameters p = OAuth.decodeForm(str); - return new Credentials(p.getFirst(TOKEN), p.getFirst(SECRET), provider); - } - - private Provider provider; - private String token; - private String secret; - - public Credentials(String token, String secret, Provider provider) { - this.token = token; - if (token == null) throw new RuntimeException("Could not find " + TOKEN + " in auth credentials"); - this.secret = secret; - if (secret == null) throw new RuntimeException("Could not find " + SECRET + " in auth credentials"); - this.provider = provider; - if (provider == null) throw new RuntimeException("Provider can't be null"); - } - - public String getToken() { - return token; - } - - public String getSecret() { - return secret; - } - - public Provider getProvider() { - return provider; - } - - public String toString() { - return TOKEN + "=" + OAuth.percentEncode(token) + "&" + SECRET + "=" + OAuth.percentEncode(secret); - } - -} diff --git a/main/src/com/google/gridworks/oauth/FreebaseProvider.java b/main/src/com/google/gridworks/oauth/FreebaseProvider.java deleted file mode 100644 index 89299a6d5..000000000 --- a/main/src/com/google/gridworks/oauth/FreebaseProvider.java +++ /dev/null @@ -1,21 +0,0 @@ -package com.google.gridworks.oauth; - -public class FreebaseProvider extends Provider { - - public FreebaseProvider(String host) { - super(host); - } - - public String getRequestTokenServiceURL() { - return "https://" + host + "/api/oauth/request_token"; - } - - public String getAccessTokenServiceURL() { - return "https://" + host + "/api/oauth/access_token"; - } - - public String getUserAuthorizationURL() { - return "https://" + host + "/signin/app"; - } - -} diff --git a/main/src/com/google/gridworks/oauth/FreebaseTimeCommonsHttpOAuthConsumer.java b/main/src/com/google/gridworks/oauth/FreebaseTimeCommonsHttpOAuthConsumer.java deleted file mode 100644 index eaac7a04c..000000000 --- a/main/src/com/google/gridworks/oauth/FreebaseTimeCommonsHttpOAuthConsumer.java +++ /dev/null @@ -1,69 +0,0 @@ -package com.google.gridworks.oauth; - -import java.io.IOException; - -import oauth.signpost.commonshttp.CommonsHttpOAuthConsumer; - -import org.apache.http.HttpEntity; -import org.apache.http.HttpResponse; -import org.apache.http.client.HttpClient; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.impl.client.DefaultHttpClient; -import org.apache.http.params.BasicHttpParams; -import org.apache.http.params.HttpConnectionParams; -import org.apache.http.params.HttpParams; -import org.apache.http.util.EntityUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class FreebaseTimeCommonsHttpOAuthConsumer extends CommonsHttpOAuthConsumer { - - final static Logger logger = LoggerFactory.getLogger("oauth"); - - private static final long serialVersionUID = -4139931605235255279L; - - private static final int SOCKET_TIMEOUT = 3000; - private static final int CONNECTION_TIMEOUT = 3000; - - private static final String TIMER_URL = "http://gridworks-gadgets.freebaseapps.com/time"; - - public FreebaseTimeCommonsHttpOAuthConsumer(String consumerKey, String consumerSecret) { - super(consumerKey, consumerSecret); - } - - /** - * It might be that the user's computer's clock is not synchronized enough with the Freebase servers - * and this might result in Freebase thinking that it was under a replay attack. - * To avoid this problem we get the timestamp directly from acre that we know is synchronized. - * - * NOTE: this call is potentially vulnerable to a man-in-the-middle (MITM) attack, but the same - * could be said if we used an NTP client. - */ - protected String generateTimestamp() { - - long time = -1; - - try { - HttpParams httpParams = new BasicHttpParams(); - HttpConnectionParams.setSoTimeout(httpParams, SOCKET_TIMEOUT); - HttpConnectionParams.setConnectionTimeout(httpParams, CONNECTION_TIMEOUT); - HttpClient httpClient = new DefaultHttpClient(httpParams); - HttpGet httpget = new HttpGet(TIMER_URL); - HttpResponse response = httpClient.execute(httpget); - HttpEntity entity = response.getEntity(); - if (entity != null) { - time = Long.parseLong(EntityUtils.toString(entity),10); - logger.debug("Got remote timestamp {}", time); - } - } catch (IOException e) { - logger.warn("Error obtaining the synchronized remote timestamp, defaulting to the local one",e); - } - - if (time == -1) { - time = System.currentTimeMillis(); - } - - return Long.toString(time / 1000L); - } - -} diff --git a/main/src/com/google/gridworks/oauth/OAuthUtilities.java b/main/src/com/google/gridworks/oauth/OAuthUtilities.java deleted file mode 100644 index c966c573a..000000000 --- a/main/src/com/google/gridworks/oauth/OAuthUtilities.java +++ /dev/null @@ -1,65 +0,0 @@ -package com.google.gridworks.oauth; - -import java.util.HashMap; -import java.util.Map; - -import javax.servlet.http.HttpServletRequest; - -import oauth.signpost.OAuthConsumer; -import oauth.signpost.OAuthProvider; -import oauth.signpost.commonshttp.CommonsHttpOAuthProvider; -import oauth.signpost.http.HttpParameters; - -import com.google.gridworks.util.FreebaseUtils; - -public class OAuthUtilities { - - static final private Map providers = new HashMap(); - static final private Map infos = new HashMap(); - - static private final String[] FREEBASE_OAUTH_INFO = { "#9202a8c04000641f80000000150979b7" , "8ded7babfad2f94f4c77e39bbd6c90f31939999b"}; - - static { - Provider freebase = new FreebaseProvider(FreebaseUtils.FREEBASE_HOST); - providers.put(freebase.getHost(), freebase); - - infos.put(freebase.getHost(), FREEBASE_OAUTH_INFO); - } - - public static Provider getProvider(String name) { - return (name == null) ? null : providers.get(name); - } - - public static Provider getProvider(HttpServletRequest request) { - String path = request.getPathInfo().substring(1); - int slash = path.lastIndexOf('/'); - String provider_str = path.substring(slash + 1); - Provider provider = getProvider(provider_str); - if (provider == null) throw new RuntimeException("Can't find OAuth provider '" + provider_str + "'"); - return provider; - } - - public static OAuthConsumer getConsumer(Provider provider) { - if (provider == null) throw new RuntimeException("Provider can't be null"); - String[] consumer_info = infos.get(provider.getHost()); - if (consumer_info == null) throw new RuntimeException("Can't find secrets for provider '" + provider.getHost() + "'"); - OAuthConsumer oauthConsumer = new FreebaseTimeCommonsHttpOAuthConsumer(consumer_info[0],consumer_info[1]); - HttpParameters params = new HttpParameters(); - params.put("realm", provider.getHost()); - oauthConsumer.setAdditionalParameters(params); - return oauthConsumer; - } - - public static OAuthConsumer getConsumer(Credentials credentials, Provider provider) { - OAuthConsumer consumer = getConsumer(provider); - if (credentials != null) { - consumer.setTokenWithSecret(credentials.getToken(), credentials.getSecret()); - } - return consumer; - } - - public static OAuthProvider getOAuthProvider(Provider p) { - return new CommonsHttpOAuthProvider(p.getRequestTokenServiceURL(), p.getAccessTokenServiceURL(), p.getUserAuthorizationURL()); - } - -} diff --git a/main/src/com/google/gridworks/oauth/Provider.java b/main/src/com/google/gridworks/oauth/Provider.java deleted file mode 100644 index 13316c44f..000000000 --- a/main/src/com/google/gridworks/oauth/Provider.java +++ /dev/null @@ -1,19 +0,0 @@ -package com.google.gridworks.oauth; - - -public abstract class Provider { - - protected String host; - - public Provider(String host) { - this.host = host; - } - - public String getHost() { - return host; - } - - abstract public String getRequestTokenServiceURL(); - abstract public String getAccessTokenServiceURL(); - abstract public String getUserAuthorizationURL(); -} diff --git a/main/src/com/google/gridworks/operations/EngineDependentMassCellOperation.java b/main/src/com/google/gridworks/operations/EngineDependentMassCellOperation.java deleted file mode 100644 index e0a301c0b..000000000 --- a/main/src/com/google/gridworks/operations/EngineDependentMassCellOperation.java +++ /dev/null @@ -1,59 +0,0 @@ -package com.google.gridworks.operations; - -import java.util.ArrayList; -import java.util.List; - -import org.json.JSONObject; - -import com.google.gridworks.browsing.Engine; -import com.google.gridworks.browsing.FilteredRows; -import com.google.gridworks.browsing.RowVisitor; -import com.google.gridworks.history.Change; -import com.google.gridworks.history.HistoryEntry; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.changes.CellChange; -import com.google.gridworks.model.changes.MassCellChange; - -abstract public class EngineDependentMassCellOperation extends EngineDependentOperation { - final protected String _columnName; - final protected boolean _updateRowContextDependencies; - - protected EngineDependentMassCellOperation( - JSONObject engineConfig, String columnName, boolean updateRowContextDependencies) { - super(engineConfig); - _columnName = columnName; - _updateRowContextDependencies = updateRowContextDependencies; - } - - protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception { - Engine engine = createEngine(project); - - Column column = project.columnModel.getColumnByName(_columnName); - if (column == null) { - throw new Exception("No column named " + _columnName); - } - - List cellChanges = new ArrayList(project.rows.size()); - - FilteredRows filteredRows = engine.getAllFilteredRows(); - try { - filteredRows.accept(project, createRowVisitor(project, cellChanges, historyEntryID)); - } catch (Exception e) { - e.printStackTrace(); - } - - String description = createDescription(column, cellChanges); - - return new HistoryEntry( - historyEntryID, project, description, this, createChange(project, column, cellChanges)); - } - - protected Change createChange(Project project, Column column, List cellChanges) { - return new MassCellChange( - cellChanges, column.getName(), _updateRowContextDependencies); - } - - abstract protected RowVisitor createRowVisitor(Project project, List cellChanges, long historyEntryID) throws Exception; - abstract protected String createDescription(Column column, List cellChanges); -} diff --git a/main/src/com/google/gridworks/operations/EngineDependentOperation.java b/main/src/com/google/gridworks/operations/EngineDependentOperation.java deleted file mode 100644 index 605538118..000000000 --- a/main/src/com/google/gridworks/operations/EngineDependentOperation.java +++ /dev/null @@ -1,38 +0,0 @@ -package com.google.gridworks.operations; - -import org.json.JSONException; -import org.json.JSONObject; - -import com.google.gridworks.browsing.Engine; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Project; -import com.google.gridworks.util.ParsingUtilities; - -abstract public class EngineDependentOperation extends AbstractOperation { - final private String _engineConfigString; - - transient protected JSONObject _engineConfig; - - protected EngineDependentOperation(JSONObject engineConfig) { - _engineConfig = engineConfig; - _engineConfigString = engineConfig == null || engineConfig.length() == 0 - ? null : engineConfig.toString(); - } - - protected Engine createEngine(Project project) throws Exception { - Engine engine = new Engine(project); - engine.initializeFromJSON(getEngineConfig()); - return engine; - } - - protected JSONObject getEngineConfig() { - if (_engineConfig == null && _engineConfigString != null) { - try { - _engineConfig = ParsingUtilities.evaluateJsonStringToObject(_engineConfigString); - } catch (JSONException e) { - // ignore - } - } - return _engineConfig; - } -} diff --git a/main/src/com/google/gridworks/operations/OnError.java b/main/src/com/google/gridworks/operations/OnError.java deleted file mode 100644 index d23142dff..000000000 --- a/main/src/com/google/gridworks/operations/OnError.java +++ /dev/null @@ -1,10 +0,0 @@ -/** - * - */ -package com.google.gridworks.operations; - -public enum OnError { - KeepOriginal, - SetToBlank, - StoreError -} \ No newline at end of file diff --git a/main/src/com/google/gridworks/operations/OperationRegistry.java b/main/src/com/google/gridworks/operations/OperationRegistry.java deleted file mode 100644 index 7a15f45d1..000000000 --- a/main/src/com/google/gridworks/operations/OperationRegistry.java +++ /dev/null @@ -1,45 +0,0 @@ -package com.google.gridworks.operations; - -import java.lang.reflect.Method; -import java.util.HashMap; -import java.util.Map; - -import org.json.JSONObject; - -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Project; - -import edu.mit.simile.butterfly.ButterflyModule; - -public abstract class OperationRegistry { - - static final public Map> s_opNameToClass = new HashMap>(); - static final public Map, String> s_opClassToName = new HashMap, String>(); - - static public void registerOperation(ButterflyModule module, String name, Class klass) { - String key = module.getName() + "/" + name; - - s_opNameToClass.put(key, klass); - s_opClassToName.put(klass, key); - } - - static public AbstractOperation reconstruct(Project project, JSONObject obj) { - try { - String op = obj.getString("op"); - if (!op.contains("/")) { - op = "core/" + op; // backward compatible - } - - Class klass = OperationRegistry.s_opNameToClass.get(op); - if (klass != null) { - Method reconstruct = klass.getMethod("reconstruct", Project.class, JSONObject.class); - if (reconstruct != null) { - return (AbstractOperation) reconstruct.invoke(null, project, obj); - } - } - } catch (Exception e) { - e.printStackTrace(); - } - return null; - } -} diff --git a/main/src/com/google/gridworks/operations/SaveProtographOperation.java b/main/src/com/google/gridworks/operations/SaveProtographOperation.java deleted file mode 100644 index d0927182c..000000000 --- a/main/src/com/google/gridworks/operations/SaveProtographOperation.java +++ /dev/null @@ -1,122 +0,0 @@ -package com.google.gridworks.operations; - -import java.io.IOException; -import java.io.LineNumberReader; -import java.io.Writer; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.history.Change; -import com.google.gridworks.history.HistoryEntry; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Project; -import com.google.gridworks.protograph.Protograph; -import com.google.gridworks.util.ParsingUtilities; -import com.google.gridworks.util.Pool; - -public class SaveProtographOperation extends AbstractOperation { - final protected Protograph _protograph; - - static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { - return new SaveProtographOperation( - Protograph.reconstruct(obj.getJSONObject("protograph")) - ); - } - - public SaveProtographOperation(Protograph protograph) { - _protograph = protograph; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); - writer.key("description"); writer.value("Save protograph"); - writer.key("protograph"); _protograph.write(writer, options); - writer.endObject(); - } - - protected String getBriefDescription(Project project) { - return "Save schema skeleton"; - } - - @Override - protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception { - String description = "Save schema-alignment protograph"; - - Change change = new ProtographChange(_protograph); - - return new HistoryEntry(historyEntryID, project, description, SaveProtographOperation.this, change); - } - - static public class ProtographChange implements Change { - final protected Protograph _newProtograph; - protected Protograph _oldProtograph; - - public ProtographChange(Protograph protograph) { - _newProtograph = protograph; - } - - public void apply(Project project) { - synchronized (project) { - _oldProtograph = (Protograph) project.overlayModels.get("freebaseProtograph"); - - project.overlayModels.put("freebaseProtograph", _newProtograph); - } - } - - public void revert(Project project) { - synchronized (project) { - if (_oldProtograph == null) { - project.overlayModels.remove("freebaseProtograph"); - } else { - project.overlayModels.put("freebaseProtograph", _oldProtograph); - } - } - } - - public void save(Writer writer, Properties options) throws IOException { - writer.write("newProtograph="); writeProtograph(_newProtograph, writer); writer.write('\n'); - writer.write("oldProtograph="); writeProtograph(_oldProtograph, writer); writer.write('\n'); - writer.write("/ec/\n"); // end of change marker - } - - static public Change load(LineNumberReader reader, Pool pool) throws Exception { - Protograph oldProtograph = null; - Protograph newProtograph = null; - - String line; - while ((line = reader.readLine()) != null && !"/ec/".equals(line)) { - int equal = line.indexOf('='); - CharSequence field = line.subSequence(0, equal); - String value = line.substring(equal + 1); - - if ("oldProtograph".equals(field) && value.length() > 0) { - oldProtograph = Protograph.reconstruct(ParsingUtilities.evaluateJsonStringToObject(value)); - } else if ("newProtograph".equals(field) && value.length() > 0) { - newProtograph = Protograph.reconstruct(ParsingUtilities.evaluateJsonStringToObject(value)); - } - } - - ProtographChange change = new ProtographChange(newProtograph); - change._oldProtograph = oldProtograph; - - return change; - } - - static protected void writeProtograph(Protograph p, Writer writer) throws IOException { - if (p != null) { - JSONWriter jsonWriter = new JSONWriter(writer); - try { - p.write(jsonWriter, new Properties()); - } catch (JSONException e) { - e.printStackTrace(); - } - } - } - } -} diff --git a/main/src/com/google/gridworks/operations/cell/BlankDownOperation.java b/main/src/com/google/gridworks/operations/cell/BlankDownOperation.java deleted file mode 100644 index e4043106b..000000000 --- a/main/src/com/google/gridworks/operations/cell/BlankDownOperation.java +++ /dev/null @@ -1,101 +0,0 @@ -package com.google.gridworks.operations.cell; - -import java.util.List; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.browsing.RowVisitor; -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; -import com.google.gridworks.model.changes.CellChange; -import com.google.gridworks.operations.EngineDependentMassCellOperation; -import com.google.gridworks.operations.OperationRegistry; - -public class BlankDownOperation extends EngineDependentMassCellOperation { - - static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { - JSONObject engineConfig = obj.getJSONObject("engineConfig"); - - return new BlankDownOperation( - engineConfig, - obj.getString("columnName") - ); - } - - public BlankDownOperation( - JSONObject engineConfig, - String columnName - ) { - super(engineConfig, columnName, true); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); - writer.key("description"); writer.value(getBriefDescription(null)); - writer.key("engineConfig"); writer.value(getEngineConfig()); - writer.key("columnName"); writer.value(_columnName); - writer.endObject(); - } - - protected String getBriefDescription(Project project) { - return "Blank down cells in column " + _columnName; - } - - protected String createDescription(Column column, - List cellChanges) { - - return "Blank down " + cellChanges.size() + - " cells in column " + column.getName(); - } - - protected RowVisitor createRowVisitor(Project project, List cellChanges, long historyEntryID) throws Exception { - Column column = project.columnModel.getColumnByName(_columnName); - - return new RowVisitor() { - int cellIndex; - List cellChanges; - Cell previousCell; - - public RowVisitor init(int cellIndex, List cellChanges) { - this.cellIndex = cellIndex; - this.cellChanges = cellChanges; - return this; - } - - @Override - public void start(Project project) { - // nothing to do - } - - @Override - public void end(Project project) { - // nothing to do - } - - public boolean visit(Project project, int rowIndex, Row row) { - Object value = row.getCellValue(cellIndex); - if (ExpressionUtils.isNonBlankData(value)) { - Cell cell = row.getCell(cellIndex); - if (previousCell != null && cell.value.equals(previousCell.value)) { - CellChange cellChange = new CellChange(rowIndex, cellIndex, cell, null); - cellChanges.add(cellChange); - } - previousCell = cell; - } else { - previousCell = null; - } - return false; - } - }.init(column.getCellIndex(), cellChanges); - } -} diff --git a/main/src/com/google/gridworks/operations/cell/FillDownOperation.java b/main/src/com/google/gridworks/operations/cell/FillDownOperation.java deleted file mode 100644 index af3eea2e7..000000000 --- a/main/src/com/google/gridworks/operations/cell/FillDownOperation.java +++ /dev/null @@ -1,97 +0,0 @@ -package com.google.gridworks.operations.cell; - -import java.util.List; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.browsing.RowVisitor; -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; -import com.google.gridworks.model.changes.CellChange; -import com.google.gridworks.operations.EngineDependentMassCellOperation; -import com.google.gridworks.operations.OperationRegistry; - -public class FillDownOperation extends EngineDependentMassCellOperation { - - static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { - JSONObject engineConfig = obj.getJSONObject("engineConfig"); - - return new FillDownOperation( - engineConfig, - obj.getString("columnName") - ); - } - - public FillDownOperation( - JSONObject engineConfig, - String columnName - ) { - super(engineConfig, columnName, true); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); - writer.key("description"); writer.value(getBriefDescription(null)); - writer.key("engineConfig"); writer.value(getEngineConfig()); - writer.key("columnName"); writer.value(_columnName); - writer.endObject(); - } - - protected String getBriefDescription(Project project) { - return "Fill down cells in column " + _columnName; - } - - protected String createDescription(Column column, - List cellChanges) { - - return "Fill down " + cellChanges.size() + - " cells in column " + column.getName(); - } - - protected RowVisitor createRowVisitor(Project project, List cellChanges, long historyEntryID) throws Exception { - Column column = project.columnModel.getColumnByName(_columnName); - - return new RowVisitor() { - int cellIndex; - List cellChanges; - Cell previousCell; - - public RowVisitor init(int cellIndex, List cellChanges) { - this.cellIndex = cellIndex; - this.cellChanges = cellChanges; - return this; - } - - @Override - public void start(Project project) { - // nothing to do - } - - @Override - public void end(Project project) { - // nothing to do - } - - public boolean visit(Project project, int rowIndex, Row row) { - Object value = row.getCellValue(cellIndex); - if (ExpressionUtils.isNonBlankData(value)) { - previousCell = row.getCell(cellIndex); - } else if (previousCell != null) { - CellChange cellChange = new CellChange(rowIndex, cellIndex, row.getCell(cellIndex), previousCell); - cellChanges.add(cellChange); - } - return false; - } - }.init(column.getCellIndex(), cellChanges); - } -} diff --git a/main/src/com/google/gridworks/operations/cell/MassEditOperation.java b/main/src/com/google/gridworks/operations/cell/MassEditOperation.java deleted file mode 100644 index 81e21c9be..000000000 --- a/main/src/com/google/gridworks/operations/cell/MassEditOperation.java +++ /dev/null @@ -1,242 +0,0 @@ -package com.google.gridworks.operations.cell; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Properties; - -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.Jsonizable; -import com.google.gridworks.browsing.RowVisitor; -import com.google.gridworks.expr.Evaluable; -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.expr.MetaParser; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; -import com.google.gridworks.model.changes.CellChange; -import com.google.gridworks.operations.EngineDependentMassCellOperation; -import com.google.gridworks.operations.OperationRegistry; -import com.google.gridworks.util.ParsingUtilities; - -public class MassEditOperation extends EngineDependentMassCellOperation { - final protected String _expression; - final protected List _edits; - - static public class Edit implements Jsonizable { - final public List from; - final public boolean fromBlank; - final public boolean fromError; - final public Serializable to; - - public Edit(List from, boolean fromBlank, boolean fromError, Serializable to) { - this.from = from; - this.fromBlank = fromBlank; - this.fromError = fromError; - this.to = to; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("fromBlank"); writer.value(fromBlank); - writer.key("fromError"); writer.value(fromError); - writer.key("from"); - writer.array(); - for (String s : from) { - writer.value(s); - } - writer.endArray(); - writer.key("to"); writer.value(to); - writer.endObject(); - } - } - - static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { - JSONObject engineConfig = obj.has("engineConfig") && !obj.isNull("engineConfig") ? - obj.getJSONObject("engineConfig") : null; - - return new MassEditOperation( - engineConfig, - obj.getString("columnName"), - obj.getString("expression"), - reconstructEdits(obj.getJSONArray("edits")) - ); - } - - static public List reconstructEdits(JSONArray editsA) throws Exception { - int editCount = editsA.length(); - - List edits = new ArrayList(editCount); - for (int i = 0; i < editCount; i++) { - JSONObject editO = editsA.getJSONObject(i); - - List from = null; - if (editO.has("from") && !editO.isNull("from")) { - JSONArray fromA = editO.getJSONArray("from"); - int fromCount = fromA.length(); - - from = new ArrayList(fromCount); - for (int j = 0; j < fromCount; j++) { - from.add(fromA.getString(j)); - } - } else { - from = new ArrayList(); - } - - boolean fromBlank = editO.has("fromBlank") && editO.getBoolean("fromBlank"); - boolean fromError = editO.has("fromError") && editO.getBoolean("fromError"); - - Serializable to = (Serializable) editO.get("to"); - if (editO.has("type")) { - String type = editO.getString("type"); - if ("date".equals(type)) { - to = ParsingUtilities.stringToDate((String) to); - } - } - - edits.add(new Edit(from, fromBlank, fromError, to)); - } - - return edits; - } - - public MassEditOperation(JSONObject engineConfig, String columnName, String expression, List edits) { - super(engineConfig, columnName, true); - _expression = expression; - _edits = edits; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); - writer.key("description"); writer.value(getBriefDescription(null)); - writer.key("engineConfig"); writer.value(getEngineConfig()); - writer.key("columnName"); writer.value(_columnName); - writer.key("expression"); writer.value(_expression); - writer.key("edits"); - writer.array(); - for (Edit edit : _edits) { - edit.write(writer, options); - } - writer.endArray(); - writer.endObject(); - } - - protected String getBriefDescription(Project project) { - return "Mass edit cells in column " + _columnName; - } - - protected String createDescription(Column column, - List cellChanges) { - - return "Mass edit " + cellChanges.size() + - " cells in column " + column.getName(); - } - - protected RowVisitor createRowVisitor(Project project, List cellChanges, long historyEntryID) throws Exception { - Column column = project.columnModel.getColumnByName(_columnName); - - Evaluable eval = MetaParser.parse(_expression); - Properties bindings = ExpressionUtils.createBindings(project); - - Map fromTo = new HashMap(); - Serializable fromBlankTo = null; - Serializable fromErrorTo = null; - - for (Edit edit : _edits) { - for (String s : edit.from) { - fromTo.put(s, edit.to); - } - - // the last edit wins - if (edit.fromBlank) { - fromBlankTo = edit.to; - } - if (edit.fromError) { - fromErrorTo = edit.to; - } - } - - return new RowVisitor() { - int cellIndex; - Properties bindings; - List cellChanges; - Evaluable eval; - - Map fromTo; - Serializable fromBlankTo; - Serializable fromErrorTo; - - public RowVisitor init( - int cellIndex, - Properties bindings, - List cellChanges, - Evaluable eval, - Map fromTo, - Serializable fromBlankTo, - Serializable fromErrorTo - ) { - this.cellIndex = cellIndex; - this.bindings = bindings; - this.cellChanges = cellChanges; - this.eval = eval; - this.fromTo = fromTo; - this.fromBlankTo = fromBlankTo; - this.fromErrorTo = fromErrorTo; - return this; - } - - @Override - public void start(Project project) { - // nothing to do - } - - @Override - public void end(Project project) { - // nothing to do - } - - public boolean visit(Project project, int rowIndex, Row row) { - Cell cell = row.getCell(cellIndex); - Cell newCell = null; - - ExpressionUtils.bind(bindings, row, rowIndex, _columnName, cell); - - Object v = eval.evaluate(bindings); - if (ExpressionUtils.isError(v)) { - if (fromErrorTo != null) { - newCell = new Cell(fromErrorTo, (cell != null) ? cell.recon : null); - } - } else if (ExpressionUtils.isNonBlankData(v)) { - String from = v.toString(); - Serializable to = fromTo.get(from); - if (to != null) { - newCell = new Cell(to, (cell != null) ? cell.recon : null); - } - } else { - if (fromBlankTo != null) { - newCell = new Cell(fromBlankTo, (cell != null) ? cell.recon : null); - } - } - - if (newCell != null) { - CellChange cellChange = new CellChange(rowIndex, cellIndex, cell, newCell); - cellChanges.add(cellChange); - } - return false; - } - }.init(column.getCellIndex(), bindings, cellChanges, eval, fromTo, fromBlankTo, fromErrorTo); - } -} diff --git a/main/src/com/google/gridworks/operations/cell/MultiValuedCellJoinOperation.java b/main/src/com/google/gridworks/operations/cell/MultiValuedCellJoinOperation.java deleted file mode 100644 index 4121d4ef9..000000000 --- a/main/src/com/google/gridworks/operations/cell/MultiValuedCellJoinOperation.java +++ /dev/null @@ -1,130 +0,0 @@ -package com.google.gridworks.operations.cell; - -import java.util.ArrayList; -import java.util.List; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.history.HistoryEntry; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; -import com.google.gridworks.model.changes.MassRowChange; -import com.google.gridworks.operations.OperationRegistry; - -public class MultiValuedCellJoinOperation extends AbstractOperation { - final protected String _columnName; - final protected String _keyColumnName; - final protected String _separator; - - static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { - return new MultiValuedCellJoinOperation( - obj.getString("columnName"), - obj.getString("keyColumnName"), - obj.getString("separator") - ); - } - - public MultiValuedCellJoinOperation( - String columnName, - String keyColumnName, - String separator - ) { - _columnName = columnName; - _keyColumnName = keyColumnName; - _separator = separator; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); - writer.key("description"); writer.value(getBriefDescription(null)); - writer.key("columnName"); writer.value(_columnName); - writer.key("keyColumnName"); writer.value(_keyColumnName); - writer.key("separator"); writer.value(_separator); - writer.endObject(); - } - - protected String getBriefDescription(Project project) { - return "Join multi-valued cells in column " + _columnName; - } - - protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception { - Column column = project.columnModel.getColumnByName(_columnName); - if (column == null) { - throw new Exception("No column named " + _columnName); - } - int cellIndex = column.getCellIndex(); - - Column keyColumn = project.columnModel.getColumnByName(_keyColumnName); - if (keyColumn == null) { - throw new Exception("No key column named " + _keyColumnName); - } - int keyCellIndex = keyColumn.getCellIndex(); - - List newRows = new ArrayList(); - - int oldRowCount = project.rows.size(); - for (int r = 0; r < oldRowCount; r++) { - Row oldRow = project.rows.get(r); - - if (oldRow.isCellBlank(keyCellIndex)) { - newRows.add(oldRow.dup()); - continue; - } - - int r2 = r + 1; - while (r2 < oldRowCount && project.rows.get(r2).isCellBlank(keyCellIndex)) { - r2++; - } - - if (r2 == r + 1) { - newRows.add(oldRow.dup()); - continue; - } - - StringBuffer sb = new StringBuffer(); - for (int r3 = r; r3 < r2; r3++) { - Object value = project.rows.get(r3).getCellValue(cellIndex); - if (ExpressionUtils.isNonBlankData(value)) { - if (sb.length() > 0) { - sb.append(_separator); - } - sb.append(value.toString()); - } - } - - for (int r3 = r; r3 < r2; r3++) { - Row newRow = project.rows.get(r3).dup(); - if (r3 == r) { - newRow.setCell(cellIndex, new Cell(sb.toString(), null)); - } else { - newRow.setCell(cellIndex, null); - } - - if (!newRow.isEmpty()) { - newRows.add(newRow); - } - } - - r = r2 - 1; // r will be incremented by the for loop anyway - } - - return new HistoryEntry( - historyEntryID, - project, - getBriefDescription(null), - this, - new MassRowChange(newRows) - ); - } - -} diff --git a/main/src/com/google/gridworks/operations/cell/MultiValuedCellSplitOperation.java b/main/src/com/google/gridworks/operations/cell/MultiValuedCellSplitOperation.java deleted file mode 100644 index d5ad1f86f..000000000 --- a/main/src/com/google/gridworks/operations/cell/MultiValuedCellSplitOperation.java +++ /dev/null @@ -1,147 +0,0 @@ -package com.google.gridworks.operations.cell; - - import java.util.ArrayList; -import java.util.List; -import java.util.Properties; - -import org.apache.commons.lang.StringUtils; -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.history.HistoryEntry; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; -import com.google.gridworks.model.changes.MassRowChange; -import com.google.gridworks.operations.OperationRegistry; - -public class MultiValuedCellSplitOperation extends AbstractOperation { - final protected String _columnName; - final protected String _keyColumnName; - final protected String _separator; - final protected String _mode; - - static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { - return new MultiValuedCellSplitOperation( - obj.getString("columnName"), - obj.getString("keyColumnName"), - obj.getString("separator"), - obj.getString("mode") - ); - } - - public MultiValuedCellSplitOperation( - String columnName, - String keyColumnName, - String separator, - String mode - ) { - _columnName = columnName; - _keyColumnName = keyColumnName; - _separator = separator; - _mode = mode; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); - writer.key("description"); writer.value("Split multi-valued cells in column " + _columnName); - writer.key("columnName"); writer.value(_columnName); - writer.key("keyColumnName"); writer.value(_keyColumnName); - writer.key("separator"); writer.value(_separator); - writer.key("mode"); writer.value(_mode); - writer.endObject(); - } - - protected String getBriefDescription(Project project) { - return "Split multi-valued cells in column " + _columnName; - } - - @Override - protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception { - Column column = project.columnModel.getColumnByName(_columnName); - if (column == null) { - throw new Exception("No column named " + _columnName); - } - int cellIndex = column.getCellIndex(); - - Column keyColumn = project.columnModel.getColumnByName(_keyColumnName); - if (keyColumn == null) { - throw new Exception("No key column named " + _keyColumnName); - } - int keyCellIndex = keyColumn.getCellIndex(); - - List newRows = new ArrayList(); - - int oldRowCount = project.rows.size(); - for (int r = 0; r < oldRowCount; r++) { - Row oldRow = project.rows.get(r); - if (oldRow.isCellBlank(cellIndex)) { - newRows.add(oldRow.dup()); - continue; - } - - Object value = oldRow.getCellValue(cellIndex); - String s = value instanceof String ? ((String) value) : value.toString(); - String[] values = null; - if (_mode.equals("regex")) { - values = s.split(_separator); - } else { - values = StringUtils.splitByWholeSeparatorPreserveAllTokens(s, _separator); - } - - if (values.length < 2) { - newRows.add(oldRow.dup()); - continue; - } - - // First value goes into the same row - { - Row firstNewRow = oldRow.dup(); - firstNewRow.setCell(cellIndex, new Cell(values[0].trim(), null)); - - newRows.add(firstNewRow); - } - - int r2 = r + 1; - for (int v = 1; v < values.length; v++) { - Cell newCell = new Cell(values[v].trim(), null); - - if (r2 < project.rows.size()) { - Row oldRow2 = project.rows.get(r2); - if (oldRow2.isCellBlank(cellIndex) && - oldRow2.isCellBlank(keyCellIndex)) { - - Row newRow = oldRow2.dup(); - newRow.setCell(cellIndex, newCell); - - newRows.add(newRow); - r2++; - - continue; - } - } - - Row newRow = new Row(cellIndex + 1); - newRow.setCell(cellIndex, newCell); - - newRows.add(newRow); - } - - r = r2 - 1; // r will be incremented by the for loop anyway - } - - return new HistoryEntry( - historyEntryID, - project, - getBriefDescription(null), - this, - new MassRowChange(newRows) - ); - } -} diff --git a/main/src/com/google/gridworks/operations/cell/TextTransformOperation.java b/main/src/com/google/gridworks/operations/cell/TextTransformOperation.java deleted file mode 100644 index e2401e8a0..000000000 --- a/main/src/com/google/gridworks/operations/cell/TextTransformOperation.java +++ /dev/null @@ -1,194 +0,0 @@ -package com.google.gridworks.operations.cell; - -import java.io.Serializable; -import java.util.List; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.browsing.RowVisitor; -import com.google.gridworks.expr.Evaluable; -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.expr.MetaParser; -import com.google.gridworks.expr.WrappedCell; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; -import com.google.gridworks.model.changes.CellChange; -import com.google.gridworks.operations.EngineDependentMassCellOperation; -import com.google.gridworks.operations.OnError; -import com.google.gridworks.operations.OperationRegistry; - -public class TextTransformOperation extends EngineDependentMassCellOperation { - final protected String _expression; - final protected OnError _onError; - final protected boolean _repeat; - final protected int _repeatCount; - - static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { - JSONObject engineConfig = obj.getJSONObject("engineConfig"); - - return new TextTransformOperation( - engineConfig, - obj.getString("columnName"), - obj.getString("expression"), - stringToOnError(obj.getString("onError")), - obj.getBoolean("repeat"), - obj.getInt("repeatCount") - ); - } - - static public OnError stringToOnError(String s) { - if ("set-to-blank".equalsIgnoreCase(s)) { - return OnError.SetToBlank; - } else if ("store-error".equalsIgnoreCase(s)) { - return OnError.StoreError; - } else { - return OnError.KeepOriginal; - } - } - static public String onErrorToString(OnError onError) { - if (onError == OnError.SetToBlank) { - return "set-to-blank"; - } else if (onError == OnError.StoreError) { - return "store-error"; - } else { - return "keep-original"; - } - } - - public TextTransformOperation( - JSONObject engineConfig, - String columnName, - String expression, - OnError onError, - boolean repeat, - int repeatCount - ) { - super(engineConfig, columnName, true); - _expression = expression; - _onError = onError; - _repeat = repeat; - _repeatCount = repeatCount; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); - writer.key("description"); writer.value(getBriefDescription(null)); - writer.key("engineConfig"); writer.value(getEngineConfig()); - writer.key("columnName"); writer.value(_columnName); - writer.key("expression"); writer.value(_expression); - writer.key("onError"); writer.value(onErrorToString(_onError)); - writer.key("repeat"); writer.value(_repeat); - writer.key("repeatCount"); writer.value(_repeatCount); - writer.endObject(); - } - - protected String getBriefDescription(Project project) { - return "Text transform on cells in column " + _columnName + " using expression " + _expression; - } - - protected String createDescription(Column column, - List cellChanges) { - - return "Text transform on " + cellChanges.size() + - " cells in column " + column.getName() + ": " + _expression; - } - - protected RowVisitor createRowVisitor(Project project, List cellChanges, long historyEntryID) throws Exception { - Column column = project.columnModel.getColumnByName(_columnName); - - Evaluable eval = MetaParser.parse(_expression); - Properties bindings = ExpressionUtils.createBindings(project); - - return new RowVisitor() { - int cellIndex; - Properties bindings; - List cellChanges; - Evaluable eval; - - public RowVisitor init(int cellIndex, Properties bindings, List cellChanges, Evaluable eval) { - this.cellIndex = cellIndex; - this.bindings = bindings; - this.cellChanges = cellChanges; - this.eval = eval; - return this; - } - - @Override - public void start(Project project) { - // nothing to do - } - - @Override - public void end(Project project) { - // nothing to do - } - - public boolean visit(Project project, int rowIndex, Row row) { - Cell cell = row.getCell(cellIndex); - Cell newCell = null; - - Object oldValue = cell != null ? cell.value : null; - - ExpressionUtils.bind(bindings, row, rowIndex, _columnName, cell); - - Object o = eval.evaluate(bindings); - if (o == null) { - if (oldValue != null) { - CellChange cellChange = new CellChange(rowIndex, cellIndex, cell, null); - cellChanges.add(cellChange); - } - } else { - if (o instanceof Cell) { - newCell = (Cell) o; - } else if (o instanceof WrappedCell) { - newCell = ((WrappedCell) o).cell; - } else { - Serializable newValue = ExpressionUtils.wrapStorable(o); - if (ExpressionUtils.isError(newValue)) { - if (_onError == OnError.KeepOriginal) { - return false; - } else if (_onError == OnError.SetToBlank) { - newValue = null; - } - } - - if (!ExpressionUtils.sameValue(oldValue, newValue)) { - newCell = new Cell(newValue, (cell != null) ? cell.recon : null); - - if (_repeat) { - for (int i = 0; i < _repeatCount; i++) { - ExpressionUtils.bind(bindings, row, rowIndex, _columnName, newCell); - - newValue = ExpressionUtils.wrapStorable(eval.evaluate(bindings)); - if (ExpressionUtils.isError(newValue)) { - break; - } else if (ExpressionUtils.sameValue(newCell.value, newValue)) { - break; - } - - newCell = new Cell(newValue, newCell.recon); - } - } - } - } - - if (newCell != null) { - CellChange cellChange = new CellChange(rowIndex, cellIndex, cell, newCell); - cellChanges.add(cellChange); - } - } - - return false; - } - }.init(column.getCellIndex(), bindings, cellChanges, eval); - } -} diff --git a/main/src/com/google/gridworks/operations/cell/TransposeColumnsIntoRowsOperation.java b/main/src/com/google/gridworks/operations/cell/TransposeColumnsIntoRowsOperation.java deleted file mode 100644 index 0612cf624..000000000 --- a/main/src/com/google/gridworks/operations/cell/TransposeColumnsIntoRowsOperation.java +++ /dev/null @@ -1,177 +0,0 @@ -package com.google.gridworks.operations.cell; - -import java.util.ArrayList; -import java.util.List; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.history.HistoryEntry; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; -import com.google.gridworks.model.changes.MassRowColumnChange; -import com.google.gridworks.operations.OperationRegistry; - -public class TransposeColumnsIntoRowsOperation extends AbstractOperation { - final protected String _startColumnName; - final protected int _columnCount; - final protected String _combinedColumnName; - final protected boolean _prependColumnName; - final protected String _separator; - final protected boolean _ignoreBlankCells; - - static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { - return new TransposeColumnsIntoRowsOperation( - obj.getString("startColumnName"), - obj.getInt("columnCount"), - obj.getString("combinedColumnName"), - obj.getBoolean("prependColumnName"), - obj.getString("separator"), - obj.getBoolean("ignoreBlankCells") - ); - } - - public TransposeColumnsIntoRowsOperation( - String startColumnName, - int columnCount, - String combinedColumnName, - boolean prependColumnName, - String separator, - boolean ignoreBlankCells - ) { - _startColumnName = startColumnName; - _columnCount = columnCount; - _combinedColumnName = combinedColumnName; - _prependColumnName = prependColumnName; - _separator = separator; - _ignoreBlankCells = ignoreBlankCells; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); - writer.key("description"); writer.value("Transpose cells in " + _columnCount + " column(s) starting with " + _startColumnName + " into rows"); - writer.key("startColumnName"); writer.value(_startColumnName); - writer.key("columnCount"); writer.value(_columnCount); - writer.key("combinedColumnName"); writer.value(_combinedColumnName); - writer.key("prependColumnName"); writer.value(_prependColumnName); - writer.key("separator"); writer.value(_separator); - writer.key("ignoreBlankCells"); writer.value(_ignoreBlankCells); - writer.endObject(); - } - - protected String getBriefDescription(Project project) { - return "Transpose cells in " + _columnCount + " column(s) starting with " + _startColumnName + " into rows"; - } - - @Override - protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception { - if (_combinedColumnName != null && - !_combinedColumnName.isEmpty() && - project.columnModel.getColumnByName(_combinedColumnName) != null) { - throw new Exception("Another column already named " + _combinedColumnName); - } - - List newColumns = new ArrayList(); - List oldColumns = project.columnModel.columns; - - int columnsLeftToTranspose = _columnCount; - int startColumnIndex = oldColumns.size(); - for (int c = 0; c < oldColumns.size(); c++) { - Column column = oldColumns.get(c); - if (columnsLeftToTranspose == 0) { - // This column is beyond the columns to transpose - - Column newColumn = new Column(newColumns.size(), column.getOriginalHeaderLabel()); - newColumn.setName(column.getName()); - - newColumns.add(newColumn); - } else if (columnsLeftToTranspose < _columnCount) { - // This column is a column to transpose, but not the first - // nothing to do - - columnsLeftToTranspose--; - } else if (_startColumnName.equals(column.getName())) { - // This is the first column to transpose - - startColumnIndex = c; - - String columnName = _combinedColumnName != null && _combinedColumnName.length() > 0 ? _combinedColumnName : column.getName(); - Column newColumn = new Column(newColumns.size(), columnName); - - newColumns.add(newColumn); - - columnsLeftToTranspose--; - } else { - // This column is before all columns to transpose - - Column newColumn = new Column(newColumns.size(), column.getOriginalHeaderLabel()); - newColumn.setName(column.getName()); - - newColumns.add(newColumn); - } - } - - - List oldRows = project.rows; - List newRows = new ArrayList(oldRows.size() * _columnCount); - for (int r = 0; r < oldRows.size(); r++) { - Row oldRow = project.rows.get(r); - Row firstNewRow = new Row(newColumns.size()); - - newRows.add(firstNewRow); - - int transposedCells = 0; - for (int c = 0; c < oldColumns.size(); c++) { - Column column = oldColumns.get(c); - Cell cell = oldRow.getCell(column.getCellIndex()); - - if (c < startColumnIndex) { - firstNewRow.setCell(c, cell); - } else if (c == startColumnIndex || c < startColumnIndex + _columnCount) { - Cell newCell; - - if (cell == null || cell.value == null) { - if (_prependColumnName && !_ignoreBlankCells) { - newCell = new Cell(column.getName() + _separator, null); - } else { - continue; - } - } else if (_prependColumnName) { - newCell = new Cell(column.getName() + _separator + cell.value, null); - } else { - newCell = cell; - } - - if (transposedCells == 0) { - firstNewRow.setCell(startColumnIndex, newCell); - } else { - Row newRow = new Row(newColumns.size()); - - newRow.setCell(startColumnIndex, newCell); - newRows.add(newRow); - } - - transposedCells++; - } else { - firstNewRow.setCell(c - _columnCount + 1, cell); - } - } - } - - return new HistoryEntry( - historyEntryID, - project, - getBriefDescription(null), - this, - new MassRowColumnChange(newColumns, newRows) - ); - } -} diff --git a/main/src/com/google/gridworks/operations/cell/TransposeRowsIntoColumnsOperation.java b/main/src/com/google/gridworks/operations/cell/TransposeRowsIntoColumnsOperation.java deleted file mode 100644 index 1a3a4f4c1..000000000 --- a/main/src/com/google/gridworks/operations/cell/TransposeRowsIntoColumnsOperation.java +++ /dev/null @@ -1,125 +0,0 @@ -package com.google.gridworks.operations.cell; - -import java.util.ArrayList; -import java.util.List; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.history.HistoryEntry; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; -import com.google.gridworks.model.changes.MassRowColumnChange; -import com.google.gridworks.operations.OperationRegistry; - -public class TransposeRowsIntoColumnsOperation extends AbstractOperation { - final protected String _columnName; - final protected int _rowCount; - - static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { - return new TransposeRowsIntoColumnsOperation( - obj.getString("columnName"), - obj.getInt("rowCount") - ); - } - - public TransposeRowsIntoColumnsOperation( - String columnName, - int rowCount - ) { - _columnName = columnName; - _rowCount = rowCount; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); - writer.key("description"); writer.value("Transpose every " + _rowCount + " cells in column " + _columnName + " into separate columns"); - writer.key("columnName"); writer.value(_columnName); - writer.key("rowCount"); writer.value(_rowCount); - writer.endObject(); - } - - protected String getBriefDescription(Project project) { - return "Transpose every " + _rowCount + " cells in column " + _columnName + " into separate columns"; - } - - @Override - protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception { - List newColumns = new ArrayList(); - List oldColumns = project.columnModel.columns; - - int columnIndex = project.columnModel.getColumnIndexByName(_columnName); - int columnCount = oldColumns.size(); - - newColumns.addAll(oldColumns.subList(0, columnIndex)); - - for (int i = 0; i < columnCount; i++) { - Column column = oldColumns.get(i); - - if (i == columnIndex) { - int newIndex = 1; - for (int n = 0; n < _rowCount; n++) { - String columnName = _columnName + " " + newIndex++; - while (project.columnModel.getColumnByName(columnName) != null) { - columnName = _columnName + " " + newIndex++; - } - - newColumns.add(new Column(i + n, columnName)); - } - } else if (i < columnIndex) { - newColumns.add(new Column(i, column.getName())); - } else { - newColumns.add(new Column(i + _rowCount - 1, column.getName())); - } - } - - List oldRows = project.rows; - List newRows = new ArrayList(oldRows.size() / _rowCount); - for (int r = 0; r < oldRows.size(); r += _rowCount) { - Row firstNewRow = new Row(newColumns.size()); - - for (int r2 = 0; r2 < _rowCount && r + r2 < oldRows.size(); r2++) { - Row oldRow = oldRows.get(r + r2); - Row newRow = r2 == 0 ? firstNewRow : new Row(newColumns.size()); - boolean hasData = r2 == 0; - - for (int c = 0; c < oldColumns.size(); c++) { - Column column = oldColumns.get(c); - Cell cell = oldRow.getCell(column.getCellIndex()); - - if (cell != null && cell.value != null) { - if (c == columnIndex) { - firstNewRow.setCell(columnIndex + r2, cell); - } else if (c < columnIndex) { - newRow.setCell(c, cell); - hasData = true; - } else { - newRow.setCell(c + _rowCount - 1, cell); - hasData = true; - } - } - } - - if (hasData) { - newRows.add(newRow); - } - } - } - - return new HistoryEntry( - historyEntryID, - project, - getBriefDescription(null), - this, - new MassRowColumnChange(newColumns, newRows) - ); - } -} diff --git a/main/src/com/google/gridworks/operations/column/ColumnAdditionByFetchingURLsOperation.java b/main/src/com/google/gridworks/operations/column/ColumnAdditionByFetchingURLsOperation.java deleted file mode 100644 index 8c895bdec..000000000 --- a/main/src/com/google/gridworks/operations/column/ColumnAdditionByFetchingURLsOperation.java +++ /dev/null @@ -1,291 +0,0 @@ -package com.google.gridworks.operations.column; - -import java.io.InputStream; -import java.io.Serializable; -import java.net.MalformedURLException; -import java.net.URL; -import java.util.ArrayList; -import java.util.List; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.browsing.Engine; -import com.google.gridworks.browsing.FilteredRows; -import com.google.gridworks.browsing.RowVisitor; -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.expr.Evaluable; -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.expr.MetaParser; -import com.google.gridworks.expr.WrappedCell; -import com.google.gridworks.history.HistoryEntry; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; -import com.google.gridworks.model.changes.CellAtRow; -import com.google.gridworks.model.changes.ColumnAdditionChange; -import com.google.gridworks.operations.EngineDependentOperation; -import com.google.gridworks.operations.OnError; -import com.google.gridworks.operations.OperationRegistry; -import com.google.gridworks.operations.cell.TextTransformOperation; -import com.google.gridworks.process.LongRunningProcess; -import com.google.gridworks.process.Process; -import com.google.gridworks.util.ParsingUtilities; - -public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperation { - final protected String _baseColumnName; - final protected String _urlExpression; - final protected OnError _onError; - - final protected String _newColumnName; - final protected int _columnInsertIndex; - final protected int _delay; - - static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { - JSONObject engineConfig = obj.getJSONObject("engineConfig"); - - return new ColumnAdditionByFetchingURLsOperation( - engineConfig, - obj.getString("baseColumnName"), - obj.getString("urlExpression"), - TextTransformOperation.stringToOnError(obj.getString("onError")), - obj.getString("newColumnName"), - obj.getInt("columnInsertIndex"), - obj.getInt("delay") - ); - } - - public ColumnAdditionByFetchingURLsOperation( - JSONObject engineConfig, - String baseColumnName, - String urlExpression, - OnError onError, - String newColumnName, - int columnInsertIndex, - int delay - ) { - super(engineConfig); - - _baseColumnName = baseColumnName; - _urlExpression = urlExpression; - _onError = onError; - - _newColumnName = newColumnName; - _columnInsertIndex = columnInsertIndex; - - _delay = delay; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); - writer.key("description"); writer.value(getBriefDescription(null)); - writer.key("engineConfig"); writer.value(getEngineConfig()); - writer.key("newColumnName"); writer.value(_newColumnName); - writer.key("columnInsertIndex"); writer.value(_columnInsertIndex); - writer.key("baseColumnName"); writer.value(_baseColumnName); - writer.key("urlExpression"); writer.value(_urlExpression); - writer.key("onError"); writer.value(TextTransformOperation.onErrorToString(_onError)); - writer.key("delay"); writer.value(_delay); - writer.endObject(); - } - - protected String getBriefDescription(Project project) { - return "Create column " + _newColumnName + - " at index " + _columnInsertIndex + - " by fetching URLs based on column " + _baseColumnName + - " using expression " + _urlExpression; - } - - protected String createDescription(Column column, List cellsAtRows) { - return "Create new column " + _newColumnName + - ", filling " + cellsAtRows.size() + - " rows by fetching URLs based on column " + column.getName() + - " and formulated as " + _urlExpression; - } - - - public Process createProcess(Project project, Properties options) throws Exception { - Column column = project.columnModel.getColumnByName(_baseColumnName); - if (column == null) { - throw new Exception("No column named " + _baseColumnName); - } - if (project.columnModel.getColumnByName(_newColumnName) != null) { - throw new Exception("Another column already named " + _newColumnName); - } - - Engine engine = createEngine(project); - engine.initializeFromJSON(_engineConfig); - - Evaluable eval = MetaParser.parse(_urlExpression); - - return new ColumnAdditionByFetchingURLsProcess( - project, - engine, - eval, - getBriefDescription(null) - ); - } - - public class ColumnAdditionByFetchingURLsProcess extends LongRunningProcess implements Runnable { - final protected Project _project; - final protected Engine _engine; - final protected Evaluable _eval; - final protected long _historyEntryID; - protected int _cellIndex; - - public ColumnAdditionByFetchingURLsProcess( - Project project, - Engine engine, - Evaluable eval, - String description - ) throws JSONException { - super(description); - _project = project; - _engine = engine; - _eval = eval; - _historyEntryID = HistoryEntry.allocateID(); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("id"); writer.value(hashCode()); - writer.key("description"); writer.value(_description); - writer.key("immediate"); writer.value(false); - writer.key("status"); writer.value(_thread == null ? "pending" : (_thread.isAlive() ? "running" : "done")); - writer.key("progress"); writer.value(_progress); - writer.endObject(); - } - - protected Runnable getRunnable() { - return this; - } - - public void run() { - List urls = new ArrayList(_project.rows.size()); - - FilteredRows filteredRows = _engine.getAllFilteredRows(); - filteredRows.accept(_project, createRowVisitor(urls)); - - List responseBodies = new ArrayList(urls.size()); - for (int i = 0; i < urls.size(); i++) { - CellAtRow urlData = urls.get(i); - CellAtRow cellAtRow = fetch(urlData); - if (cellAtRow != null) { - responseBodies.add(cellAtRow); - } - - _progress = i * 100 / urls.size(); - try { - Thread.sleep(_delay); - } catch (InterruptedException e) { - if (_canceled) { - break; - } - } - } - - if (!_canceled) { - - HistoryEntry historyEntry = new HistoryEntry( - _historyEntryID, - _project, - _description, - ColumnAdditionByFetchingURLsOperation.this, - new ColumnAdditionChange( - _newColumnName, - _columnInsertIndex, - responseBodies) - ); - - _project.history.addEntry(historyEntry); - _project.processManager.onDoneProcess(this); - } - } - - CellAtRow fetch(CellAtRow urlData) { - String urlString = urlData.cell.value.toString(); - URL url = null; - - try { - url = new URL(urlString); - } catch (MalformedURLException e) { - return null; - } - - try { - InputStream is = url.openStream(); - try { - return new CellAtRow(urlData.row, new Cell(ParsingUtilities.inputStreamToString(is), null)); - } finally { - is.close(); - } - } catch (Exception e) { - return _onError == OnError.StoreError ? - new CellAtRow(urlData.row, new Cell(new EvalError(e.getMessage()), null)) : null; - } - } - - RowVisitor createRowVisitor(List cellsAtRows) { - return new RowVisitor() { - int cellIndex; - Properties bindings; - List cellsAtRows; - - public RowVisitor init(List cellsAtRows) { - Column column = _project.columnModel.getColumnByName(_baseColumnName); - - this.cellIndex = column.getCellIndex(); - this.bindings = ExpressionUtils.createBindings(_project); - this.cellsAtRows = cellsAtRows; - return this; - } - - @Override - public void start(Project project) { - // nothing to do - } - - @Override - public void end(Project project) { - // nothing to do - } - - public boolean visit(Project project, int rowIndex, Row row) { - Cell cell = row.getCell(cellIndex); - Cell newCell = null; - - ExpressionUtils.bind(bindings, row, rowIndex, _baseColumnName, cell); - - Object o = _eval.evaluate(bindings); - if (o != null) { - if (o instanceof Cell) { - newCell = (Cell) o; - } else if (o instanceof WrappedCell) { - newCell = ((WrappedCell) o).cell; - } else { - Serializable v = ExpressionUtils.wrapStorable(o); - if (ExpressionUtils.isNonBlankData(v)) { - newCell = new Cell(v.toString(), null); - } - } - } - - if (newCell != null) { - cellsAtRows.add(new CellAtRow(rowIndex, newCell)); - } - - return false; - } - }.init(cellsAtRows); - } - } -} diff --git a/main/src/com/google/gridworks/operations/column/ColumnAdditionOperation.java b/main/src/com/google/gridworks/operations/column/ColumnAdditionOperation.java deleted file mode 100644 index 5366e1c3e..000000000 --- a/main/src/com/google/gridworks/operations/column/ColumnAdditionOperation.java +++ /dev/null @@ -1,191 +0,0 @@ -package com.google.gridworks.operations.column; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.List; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.browsing.Engine; -import com.google.gridworks.browsing.FilteredRows; -import com.google.gridworks.browsing.RowVisitor; -import com.google.gridworks.expr.Evaluable; -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.expr.MetaParser; -import com.google.gridworks.expr.WrappedCell; -import com.google.gridworks.history.Change; -import com.google.gridworks.history.HistoryEntry; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; -import com.google.gridworks.model.changes.CellAtRow; -import com.google.gridworks.model.changes.ColumnAdditionChange; -import com.google.gridworks.operations.EngineDependentOperation; -import com.google.gridworks.operations.OnError; -import com.google.gridworks.operations.OperationRegistry; -import com.google.gridworks.operations.cell.TextTransformOperation; - -public class ColumnAdditionOperation extends EngineDependentOperation { - final protected String _baseColumnName; - final protected String _expression; - final protected OnError _onError; - - final protected String _newColumnName; - final protected int _columnInsertIndex; - - static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { - JSONObject engineConfig = obj.getJSONObject("engineConfig"); - - return new ColumnAdditionOperation( - engineConfig, - obj.getString("baseColumnName"), - obj.getString("expression"), - TextTransformOperation.stringToOnError(obj.getString("onError")), - obj.getString("newColumnName"), - obj.getInt("columnInsertIndex") - ); - } - - public ColumnAdditionOperation( - JSONObject engineConfig, - String baseColumnName, - String expression, - OnError onError, - String newColumnName, - int columnInsertIndex - ) { - super(engineConfig); - - _baseColumnName = baseColumnName; - _expression = expression; - _onError = onError; - - _newColumnName = newColumnName; - _columnInsertIndex = columnInsertIndex; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); - writer.key("description"); writer.value(getBriefDescription(null)); - writer.key("engineConfig"); writer.value(getEngineConfig()); - writer.key("newColumnName"); writer.value(_newColumnName); - writer.key("columnInsertIndex"); writer.value(_columnInsertIndex); - writer.key("baseColumnName"); writer.value(_baseColumnName); - writer.key("expression"); writer.value(_expression); - writer.key("onError"); writer.value(TextTransformOperation.onErrorToString(_onError)); - writer.endObject(); - } - - protected String getBriefDescription(Project project) { - return "Create column " + _newColumnName + - " at index " + _columnInsertIndex + - " based on column " + _baseColumnName + - " using expression " + _expression; - } - - protected String createDescription(Column column, List cellsAtRows) { - return "Create new column " + _newColumnName + - " based on column " + column.getName() + - " by filling " + cellsAtRows.size() + - " rows with " + _expression; - } - - protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception { - Engine engine = createEngine(project); - - Column column = project.columnModel.getColumnByName(_baseColumnName); - if (column == null) { - throw new Exception("No column named " + _baseColumnName); - } - if (project.columnModel.getColumnByName(_newColumnName) != null) { - throw new Exception("Another column already named " + _newColumnName); - } - - List cellsAtRows = new ArrayList(project.rows.size()); - - FilteredRows filteredRows = engine.getAllFilteredRows(); - filteredRows.accept(project, createRowVisitor(project, cellsAtRows)); - - String description = createDescription(column, cellsAtRows); - - Change change = new ColumnAdditionChange(_newColumnName, _columnInsertIndex, cellsAtRows); - - return new HistoryEntry( - historyEntryID, project, description, this, change); - } - - protected RowVisitor createRowVisitor(Project project, List cellsAtRows) throws Exception { - Column column = project.columnModel.getColumnByName(_baseColumnName); - - Evaluable eval = MetaParser.parse(_expression); - Properties bindings = ExpressionUtils.createBindings(project); - - return new RowVisitor() { - int cellIndex; - Properties bindings; - List cellsAtRows; - Evaluable eval; - - public RowVisitor init(int cellIndex, Properties bindings, List cellsAtRows, Evaluable eval) { - this.cellIndex = cellIndex; - this.bindings = bindings; - this.cellsAtRows = cellsAtRows; - this.eval = eval; - return this; - } - - @Override - public void start(Project project) { - // nothing to do - } - - @Override - public void end(Project project) { - // nothing to do - } - - public boolean visit(Project project, int rowIndex, Row row) { - Cell cell = row.getCell(cellIndex); - Cell newCell = null; - - ExpressionUtils.bind(bindings, row, rowIndex, _baseColumnName, cell); - - Object o = eval.evaluate(bindings); - if (o != null) { - if (o instanceof Cell) { - newCell = (Cell) o; - } else if (o instanceof WrappedCell) { - newCell = ((WrappedCell) o).cell; - } else { - Serializable v = ExpressionUtils.wrapStorable(o); - if (ExpressionUtils.isError(v)) { - if (_onError == OnError.SetToBlank) { - return false; - } else if (_onError == OnError.KeepOriginal) { - v = cell != null ? cell.value : null; - } - } - - if (v != null) { - newCell = new Cell(v, null); - } - } - } - - if (newCell != null) { - cellsAtRows.add(new CellAtRow(rowIndex, newCell)); - } - - return false; - } - }.init(column.getCellIndex(), bindings, cellsAtRows, eval); - } -} diff --git a/main/src/com/google/gridworks/operations/column/ColumnMoveOperation.java b/main/src/com/google/gridworks/operations/column/ColumnMoveOperation.java deleted file mode 100644 index ebf68b96c..000000000 --- a/main/src/com/google/gridworks/operations/column/ColumnMoveOperation.java +++ /dev/null @@ -1,60 +0,0 @@ -package com.google.gridworks.operations.column; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.history.Change; -import com.google.gridworks.history.HistoryEntry; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.changes.ColumnMoveChange; -import com.google.gridworks.operations.OperationRegistry; - -public class ColumnMoveOperation extends AbstractOperation { - final protected String _columnName; - final protected int _index; - - static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { - return new ColumnMoveOperation( - obj.getString("columnName"), - obj.getInt("index") - ); - } - - public ColumnMoveOperation( - String columnName, - int index - ) { - _columnName = columnName; - _index = index; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); - writer.key("description"); writer.value("Move column " + _columnName + " to position " + _index); - writer.key("columnName"); writer.value(_columnName); - writer.key("index"); writer.value(_index); - writer.endObject(); - } - - - protected String getBriefDescription(Project project) { - return "Move column " + _columnName + " to position " + _index; - } - - protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception { - if (project.columnModel.getColumnByName(_columnName) == null) { - throw new Exception("No column named " + _columnName); - } - - Change change = new ColumnMoveChange(_columnName, _index); - - return new HistoryEntry(historyEntryID, project, getBriefDescription(null), ColumnMoveOperation.this, change); - } -} diff --git a/main/src/com/google/gridworks/operations/column/ColumnRemovalOperation.java b/main/src/com/google/gridworks/operations/column/ColumnRemovalOperation.java deleted file mode 100644 index bbb7e4672..000000000 --- a/main/src/com/google/gridworks/operations/column/ColumnRemovalOperation.java +++ /dev/null @@ -1,59 +0,0 @@ -package com.google.gridworks.operations.column; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.history.Change; -import com.google.gridworks.history.HistoryEntry; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.changes.ColumnRemovalChange; -import com.google.gridworks.operations.OperationRegistry; - -public class ColumnRemovalOperation extends AbstractOperation { - final protected String _columnName; - - static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { - return new ColumnRemovalOperation( - obj.getString("columnName") - ); - } - - public ColumnRemovalOperation( - String columnName - ) { - _columnName = columnName; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); - writer.key("description"); writer.value("Remove column " + _columnName); - writer.key("columnName"); writer.value(_columnName); - writer.endObject(); - } - - - protected String getBriefDescription(Project project) { - return "Remove column " + _columnName; - } - - protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception { - Column column = project.columnModel.getColumnByName(_columnName); - if (column == null) { - throw new Exception("No column named " + _columnName); - } - - String description = "Remove column " + column.getName(); - - Change change = new ColumnRemovalChange(project.columnModel.columns.indexOf(column)); - - return new HistoryEntry(historyEntryID, project, description, ColumnRemovalOperation.this, change); - } -} diff --git a/main/src/com/google/gridworks/operations/column/ColumnRenameOperation.java b/main/src/com/google/gridworks/operations/column/ColumnRenameOperation.java deleted file mode 100644 index 9edac19b0..000000000 --- a/main/src/com/google/gridworks/operations/column/ColumnRenameOperation.java +++ /dev/null @@ -1,63 +0,0 @@ -package com.google.gridworks.operations.column; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.history.Change; -import com.google.gridworks.history.HistoryEntry; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.changes.ColumnRenameChange; -import com.google.gridworks.operations.OperationRegistry; - -public class ColumnRenameOperation extends AbstractOperation { - final protected String _oldColumnName; - final protected String _newColumnName; - - static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { - return new ColumnRenameOperation( - obj.getString("oldColumnName"), - obj.getString("newColumnName") - ); - } - - public ColumnRenameOperation( - String oldColumnName, - String newColumnName - ) { - _oldColumnName = oldColumnName; - _newColumnName = newColumnName; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); - writer.key("description"); writer.value("Rename column " + _oldColumnName + " to " + _newColumnName); - writer.key("oldColumnName"); writer.value(_oldColumnName); - writer.key("newColumnName"); writer.value(_newColumnName); - writer.endObject(); - } - - - protected String getBriefDescription(Project project) { - return "Rename column " + _oldColumnName + " to " + _newColumnName; - } - - protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception { - if (project.columnModel.getColumnByName(_oldColumnName) == null) { - throw new Exception("No column named " + _oldColumnName); - } - if (project.columnModel.getColumnByName(_newColumnName) != null) { - throw new Exception("Another column already named " + _newColumnName); - } - - Change change = new ColumnRenameChange(_oldColumnName, _newColumnName); - - return new HistoryEntry(historyEntryID, project, getBriefDescription(null), ColumnRenameOperation.this, change); - } -} diff --git a/main/src/com/google/gridworks/operations/column/ColumnReorderOperation.java b/main/src/com/google/gridworks/operations/column/ColumnReorderOperation.java deleted file mode 100644 index ae416b878..000000000 --- a/main/src/com/google/gridworks/operations/column/ColumnReorderOperation.java +++ /dev/null @@ -1,60 +0,0 @@ -package com.google.gridworks.operations.column; - -import java.util.ArrayList; -import java.util.List; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.history.HistoryEntry; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.changes.ColumnReorderChange; -import com.google.gridworks.operations.OperationRegistry; -import com.google.gridworks.util.JSONUtilities; - -public class ColumnReorderOperation extends AbstractOperation { - static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { - List columnNames = new ArrayList(); - - JSONUtilities.getStringList(obj, "columnNames", columnNames); - - return new ColumnReorderOperation(columnNames); - } - - final protected List _columnNames; - - public ColumnReorderOperation(List columnNames) { - _columnNames = columnNames; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); - writer.key("description"); writer.value(getBriefDescription(null)); - writer.key("columnNames"); writer.array(); - for (String n : _columnNames) { - writer.value(n); - } - writer.endArray(); - writer.endObject(); - } - - protected String getBriefDescription(Project project) { - return "Reorder columns"; - } - - protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception { - return new HistoryEntry( - historyEntryID, - project, - "Reorder columns", - this, - new ColumnReorderChange(_columnNames) - ); - } -} diff --git a/main/src/com/google/gridworks/operations/column/ColumnSplitOperation.java b/main/src/com/google/gridworks/operations/column/ColumnSplitOperation.java deleted file mode 100644 index f99a8f42c..000000000 --- a/main/src/com/google/gridworks/operations/column/ColumnSplitOperation.java +++ /dev/null @@ -1,289 +0,0 @@ -package com.google.gridworks.operations.column; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.List; -import java.util.Properties; -import java.util.regex.Pattern; - -import org.apache.commons.lang.StringUtils; -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.browsing.Engine; -import com.google.gridworks.browsing.FilteredRows; -import com.google.gridworks.browsing.RowVisitor; -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.history.Change; -import com.google.gridworks.history.HistoryEntry; -import com.google.gridworks.importers.ImporterUtilities; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; -import com.google.gridworks.model.changes.ColumnSplitChange; -import com.google.gridworks.operations.EngineDependentOperation; -import com.google.gridworks.operations.OperationRegistry; -import com.google.gridworks.util.JSONUtilities; - -public class ColumnSplitOperation extends EngineDependentOperation { - final protected String _columnName; - final protected boolean _guessCellType; - final protected boolean _removeOriginalColumn; - final protected String _mode; - - final protected String _separator; - final protected boolean _regex; - final protected int _maxColumns; - - final protected int[] _fieldLengths; - - static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { - JSONObject engineConfig = obj.getJSONObject("engineConfig"); - String mode = obj.getString("mode"); - - if ("separator".equals(mode)) { - return new ColumnSplitOperation( - engineConfig, - obj.getString("columnName"), - obj.getBoolean("guessCellType"), - obj.getBoolean("removeOriginalColumn"), - obj.getString("separator"), - obj.getBoolean("regex"), - obj.getInt("maxColumns") - ); - } else { - return new ColumnSplitOperation( - engineConfig, - obj.getString("columnName"), - obj.getBoolean("guessCellType"), - obj.getBoolean("removeOriginalColumn"), - JSONUtilities.getIntArray(obj, "fieldLengths") - ); - } - } - - public ColumnSplitOperation( - JSONObject engineConfig, - String columnName, - boolean guessCellType, - boolean removeOriginalColumn, - String separator, - boolean regex, - int maxColumns - ) { - super(engineConfig); - - _columnName = columnName; - _guessCellType = guessCellType; - _removeOriginalColumn = removeOriginalColumn; - - _mode = "separator"; - _separator = separator; - _regex = regex; - _maxColumns = maxColumns; - - _fieldLengths = null; - } - - public ColumnSplitOperation( - JSONObject engineConfig, - String columnName, - boolean guessCellType, - boolean removeOriginalColumn, - int[] fieldLengths - ) { - super(engineConfig); - - _columnName = columnName; - _guessCellType = guessCellType; - _removeOriginalColumn = removeOriginalColumn; - - _mode = "lengths"; - _separator = null; - _regex = false; - _maxColumns = -1; - - _fieldLengths = fieldLengths; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); - writer.key("description"); writer.value(getBriefDescription(null)); - writer.key("engineConfig"); writer.value(getEngineConfig()); - writer.key("columnName"); writer.value(_columnName); - writer.key("guessCellType"); writer.value(_guessCellType); - writer.key("removeOriginalColumn"); writer.value(_removeOriginalColumn); - writer.key("mode"); writer.value(_mode); - if ("separator".equals(_mode)) { - writer.key("separator"); writer.value(_separator); - writer.key("regex"); writer.value(_regex); - writer.key("maxColumns"); writer.value(_maxColumns); - } else { - writer.key("fieldLengths"); writer.array(); - for (int l : _fieldLengths) { - writer.value(l); - } - writer.endArray(); - } - writer.endObject(); - } - - protected String getBriefDescription(Project project) { - return "Split column " + _columnName + - ("separator".equals(_mode) ? " by separator" : " by field lengths"); - } - - protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception { - Engine engine = createEngine(project); - - Column column = project.columnModel.getColumnByName(_columnName); - if (column == null) { - throw new Exception("No column named " + _columnName); - } - - List columnNames = new ArrayList(); - List rowIndices = new ArrayList(project.rows.size()); - List> tuples = new ArrayList>(project.rows.size()); - - FilteredRows filteredRows = engine.getAllFilteredRows(); - RowVisitor rowVisitor; - if ("lengths".equals(_mode)) { - rowVisitor = new ColumnSplitRowVisitor(column.getCellIndex(), columnNames, rowIndices, tuples) { - protected java.util.List split(String s) { - List results = new ArrayList(_fieldLengths.length + 1); - - int lastIndex = 0; - for (int i = 0; i < _fieldLengths.length; i++) { - int from = lastIndex; - int length = _fieldLengths[i]; - int to = Math.min(from + length, s.length()); - - results.add(stringToValue(s.substring(from, to))); - - lastIndex = to; - } - - return results; - }; - }; - } else if (_regex) { - Pattern pattern = Pattern.compile(_separator); - - rowVisitor = new ColumnSplitRowVisitor(column.getCellIndex(), columnNames, rowIndices, tuples) { - Pattern _pattern; - - protected java.util.List split(String s) { - return stringArrayToValueList(_pattern.split(s, _maxColumns)); - }; - - public RowVisitor init(Pattern pattern) { - _pattern = pattern; - return this; - } - }.init(pattern); - } else { - rowVisitor = new ColumnSplitRowVisitor(column.getCellIndex(), columnNames, rowIndices, tuples) { - protected java.util.List split(String s) { - return stringArrayToValueList( - StringUtils.splitByWholeSeparatorPreserveAllTokens(s, _separator, _maxColumns)); - }; - }; - } - - filteredRows.accept(project, rowVisitor); - - String description = - "Split " + rowIndices.size() + - " cell(s) in column " + _columnName + - " into several columns" + - ("separator".equals(_mode) ? " by separator" : " by field lengths"); - - Change change = new ColumnSplitChange( - _columnName, - columnNames, - rowIndices, - tuples, - _removeOriginalColumn - ); - - return new HistoryEntry( - historyEntryID, project, description, this, change); - } - - protected class ColumnSplitRowVisitor implements RowVisitor { - - int cellIndex; - List columnNames; - List rowIndices; - List> tuples; - - int columnNameIndex = 1; - - ColumnSplitRowVisitor( - int cellIndex, - List columnNames, - List rowIndices, - List> tuples - ) { - this.cellIndex = cellIndex; - this.columnNames = columnNames; - this.rowIndices = rowIndices; - this.tuples = tuples; - } - - @Override - public void start(Project project) { - // nothing to do - } - - @Override - public void end(Project project) { - // nothing to do - } - - public boolean visit(Project project, int rowIndex, Row row) { - Object value = row.getCellValue(cellIndex); - if (ExpressionUtils.isNonBlankData(value)) { - String s = value instanceof String ? ((String) value) : value.toString(); - - List tuple = split(s); - - rowIndices.add(rowIndex); - tuples.add(tuple); - - for (int i = columnNames.size(); i < tuple.size(); i++) { - while (true) { - String newColumnName = _columnName + " " + columnNameIndex++; - if (project.columnModel.getColumnByName(newColumnName) == null) { - columnNames.add(newColumnName); - break; - } - } - } - } - return false; - } - - protected List split(String s) { - throw new UnsupportedOperationException(); - } - - protected Serializable stringToValue(String s) { - return _guessCellType ? ImporterUtilities.parseCellValue(s) : s; - } - - protected List stringArrayToValueList(String[] cells) { - List results = new ArrayList(cells.length); - for (String cell : cells) { - results.add(stringToValue(cell)); - } - - return results; - } - } -} diff --git a/main/src/com/google/gridworks/operations/column/ExtendDataOperation.java b/main/src/com/google/gridworks/operations/column/ExtendDataOperation.java deleted file mode 100644 index 16f842681..000000000 --- a/main/src/com/google/gridworks/operations/column/ExtendDataOperation.java +++ /dev/null @@ -1,275 +0,0 @@ -package com.google.gridworks.operations.column; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.Set; - -import org.apache.commons.lang.StringUtils; -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.browsing.Engine; -import com.google.gridworks.browsing.FilteredRows; -import com.google.gridworks.browsing.RowVisitor; -import com.google.gridworks.history.HistoryEntry; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.ReconCandidate; -import com.google.gridworks.model.Row; -import com.google.gridworks.model.changes.CellAtRow; -import com.google.gridworks.model.changes.DataExtensionChange; -import com.google.gridworks.operations.EngineDependentOperation; -import com.google.gridworks.operations.OperationRegistry; -import com.google.gridworks.process.LongRunningProcess; -import com.google.gridworks.process.Process; -import com.google.gridworks.protograph.FreebaseType; -import com.google.gridworks.util.FreebaseDataExtensionJob; -import com.google.gridworks.util.FreebaseDataExtensionJob.ColumnInfo; -import com.google.gridworks.util.FreebaseDataExtensionJob.DataExtension; - -public class ExtendDataOperation extends EngineDependentOperation { - final protected String _baseColumnName; - final protected JSONObject _extension; - final protected int _columnInsertIndex; - - static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { - JSONObject engineConfig = obj.getJSONObject("engineConfig"); - - return new ExtendDataOperation( - engineConfig, - obj.getString("baseColumnName"), - obj.getJSONObject("extension"), - obj.getInt("columnInsertIndex") - ); - } - - public ExtendDataOperation( - JSONObject engineConfig, - String baseColumnName, - JSONObject extension, - int columnInsertIndex - ) { - super(engineConfig); - - _baseColumnName = baseColumnName; - _extension = extension; - _columnInsertIndex = columnInsertIndex; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); - writer.key("description"); writer.value(getBriefDescription(null)); - writer.key("engineConfig"); writer.value(getEngineConfig()); - writer.key("columnInsertIndex"); writer.value(_columnInsertIndex); - writer.key("baseColumnName"); writer.value(_baseColumnName); - writer.key("extension"); writer.value(_extension); - writer.endObject(); - } - - protected String getBriefDescription(Project project) { - return "Extend data at index " + _columnInsertIndex + - " based on column " + _baseColumnName; - } - - protected String createDescription(Column column, List cellsAtRows) { - return "Extend data at index " + _columnInsertIndex + - " based on column " + column.getName() + - " by filling " + cellsAtRows.size(); - } - - public Process createProcess(Project project, Properties options) throws Exception { - return new ExtendDataProcess( - project, - getEngineConfig(), - getBriefDescription(null) - ); - } - - public class ExtendDataProcess extends LongRunningProcess implements Runnable { - final protected Project _project; - final protected JSONObject _engineConfig; - final protected long _historyEntryID; - protected int _cellIndex; - protected FreebaseDataExtensionJob _job; - - public ExtendDataProcess( - Project project, - JSONObject engineConfig, - String description - ) throws JSONException { - super(description); - _project = project; - _engineConfig = engineConfig; - _historyEntryID = HistoryEntry.allocateID(); - - _job = new FreebaseDataExtensionJob(_extension); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("id"); writer.value(hashCode()); - writer.key("description"); writer.value(_description); - writer.key("immediate"); writer.value(false); - writer.key("status"); writer.value(_thread == null ? "pending" : (_thread.isAlive() ? "running" : "done")); - writer.key("progress"); writer.value(_progress); - writer.endObject(); - } - - protected Runnable getRunnable() { - return this; - } - - protected void populateRowsWithMatches(List rowIndices) throws Exception { - Engine engine = new Engine(_project); - engine.initializeFromJSON(_engineConfig); - - Column column = _project.columnModel.getColumnByName(_baseColumnName); - if (column == null) { - throw new Exception("No column named " + _baseColumnName); - } - - _cellIndex = column.getCellIndex(); - - FilteredRows filteredRows = engine.getAllFilteredRows(); - filteredRows.accept(_project, new RowVisitor() { - List _rowIndices; - - public RowVisitor init(List rowIndices) { - _rowIndices = rowIndices; - return this; - } - - @Override - public void start(Project project) { - // nothing to do - } - - @Override - public void end(Project project) { - // nothing to do - } - - public boolean visit(Project project, int rowIndex, Row row) { - Cell cell = row.getCell(_cellIndex); - if (cell != null && cell.recon != null && cell.recon.match != null) { - _rowIndices.add(rowIndex); - } - - return false; - } - }.init(rowIndices)); - } - - protected int extendRows( - List rowIndices, - List dataExtensions, - int from, - int limit, - Map reconCandidateMap - ) { - Set ids = new HashSet(); - - int end; - for (end = from; end < limit && ids.size() < 10; end++) { - int index = rowIndices.get(end); - Row row = _project.rows.get(index); - Cell cell = row.getCell(_cellIndex); - - ids.add(cell.recon.match.id); - } - - Map map = null; - try { - map = _job.extend(ids, reconCandidateMap); - } catch (Exception e) { - map = new HashMap(); - } - - for (int i = from; i < end; i++) { - int index = rowIndices.get(i); - Row row = _project.rows.get(index); - Cell cell = row.getCell(_cellIndex); - String guid = cell.recon.match.id; - - if (map.containsKey(guid)) { - dataExtensions.add(map.get(guid)); - } else { - dataExtensions.add(null); - } - } - - return end; - } - - public void run() { - List rowIndices = new ArrayList(); - List dataExtensions = new ArrayList(); - - try { - populateRowsWithMatches(rowIndices); - } catch (Exception e2) { - // TODO : Not sure what to do here? - e2.printStackTrace(); - } - - int start = 0; - Map reconCandidateMap = new HashMap(); - - while (start < rowIndices.size()) { - int end = extendRows(rowIndices, dataExtensions, start, rowIndices.size(), reconCandidateMap); - start = end; - - _progress = end * 100 / rowIndices.size(); - try { - Thread.sleep(200); - } catch (InterruptedException e) { - if (_canceled) { - break; - } - } - } - - if (!_canceled) { - List columnNames = new ArrayList(); - for (ColumnInfo info : _job.columns) { - columnNames.add(StringUtils.join(info.names, " - ")); - } - - List columnTypes = new ArrayList(); - for (ColumnInfo info : _job.columns) { - columnTypes.add(info.expectedType); - } - - HistoryEntry historyEntry = new HistoryEntry( - _historyEntryID, - _project, - _description, - ExtendDataOperation.this, - new DataExtensionChange( - _baseColumnName, - _columnInsertIndex, - columnNames, - columnTypes, - rowIndices, - dataExtensions, - _historyEntryID) - ); - - _project.history.addEntry(historyEntry); - _project.processManager.onDoneProcess(this); - } - } - } -} diff --git a/main/src/com/google/gridworks/operations/recon/ImportQADataOperation.java b/main/src/com/google/gridworks/operations/recon/ImportQADataOperation.java deleted file mode 100644 index abe2f2985..000000000 --- a/main/src/com/google/gridworks/operations/recon/ImportQADataOperation.java +++ /dev/null @@ -1,106 +0,0 @@ -package com.google.gridworks.operations.recon; - -import java.io.InputStreamReader; -import java.io.LineNumberReader; -import java.net.HttpURLConnection; -import java.net.URL; -import java.util.HashMap; -import java.util.Map; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.commands.freebase.UploadDataCommand; -import com.google.gridworks.history.HistoryEntry; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Recon; -import com.google.gridworks.model.Row; -import com.google.gridworks.model.changes.MassReconChange; -import com.google.gridworks.operations.OperationRegistry; -import com.google.gridworks.util.ParsingUtilities; - -public class ImportQADataOperation extends AbstractOperation { - static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { - return new ImportQADataOperation(); - } - - public ImportQADataOperation() { - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); - writer.key("description"); writer.value(getBriefDescription(null)); - writer.endObject(); - } - - @Override - protected String getBriefDescription(Project project) { - return "Import QA DAta"; - } - - @Override - protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception { - Integer jobID = (Integer) project.getMetadata().getPreferenceStore().get(UploadDataCommand.s_dataLoadJobIDPref); - if (jobID == null) { - throw new InternalError("Project is not associated with any data loading job."); - } - - Map reconIDToResult = new HashMap(); - - URL url = new URL("http://gridworks-loads.dfhuynh.user.dev.freebaseapps.com/get_answers/" + jobID); - HttpURLConnection conn = (HttpURLConnection) url.openConnection(); - conn.setReadTimeout(30000); // 30 seconds - - LineNumberReader reader = new LineNumberReader(new InputStreamReader(conn.getInputStream())); - try { - String line; - while ((line = reader.readLine()) != null) { - JSONObject obj = ParsingUtilities.evaluateJsonStringToObject(line); - long reconID = Long.parseLong(obj.getString("recon_id").substring(3)); - - reconIDToResult.put(reconID, obj.getString("result")); - } - } finally { - reader.close(); - } - - Map oldRecons = new HashMap(); - Map newRecons = new HashMap(); - - for (int r = 0; r < project.rows.size(); r++) { - Row row = project.rows.get(r); - - for (int c = 0; c < row.cells.size(); c++) { - Cell cell = row.cells.get(c); - if (cell != null && cell.recon != null) { - Recon oldRecon = cell.recon; - - if (reconIDToResult.containsKey(oldRecon.id)) { - Recon newRecon = oldRecon.dup(); - newRecon.setFeature(Recon.Feature_qaResult, reconIDToResult.get(oldRecon.id)); - - reconIDToResult.remove(oldRecon.id); - - oldRecons.put(oldRecon.id, oldRecon); - newRecons.put(oldRecon.id, newRecon); - } - } - } - } - - return new HistoryEntry( - historyEntryID, - project, - getBriefDescription(project), - this, - new MassReconChange(newRecons, oldRecons) - ); - } -} diff --git a/main/src/com/google/gridworks/operations/recon/ReconDiscardJudgmentsOperation.java b/main/src/com/google/gridworks/operations/recon/ReconDiscardJudgmentsOperation.java deleted file mode 100644 index 158a1cb97..000000000 --- a/main/src/com/google/gridworks/operations/recon/ReconDiscardJudgmentsOperation.java +++ /dev/null @@ -1,125 +0,0 @@ -package com.google.gridworks.operations.recon; - -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.browsing.RowVisitor; -import com.google.gridworks.history.Change; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Recon; -import com.google.gridworks.model.Row; -import com.google.gridworks.model.Recon.Judgment; -import com.google.gridworks.model.changes.CellChange; -import com.google.gridworks.model.changes.ReconChange; -import com.google.gridworks.operations.EngineDependentMassCellOperation; -import com.google.gridworks.operations.OperationRegistry; - -public class ReconDiscardJudgmentsOperation extends EngineDependentMassCellOperation { - static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { - JSONObject engineConfig = obj.getJSONObject("engineConfig"); - String columnName = obj.getString("columnName"); - - return new ReconDiscardJudgmentsOperation( - engineConfig, - columnName - ); - } - - public ReconDiscardJudgmentsOperation(JSONObject engineConfig, String columnName) { - super(engineConfig, columnName, false); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); - writer.key("description"); writer.value(getBriefDescription(null)); - writer.key("engineConfig"); writer.value(getEngineConfig()); - writer.key("columnName"); writer.value(_columnName); - writer.endObject(); - } - - protected String getBriefDescription(Project project) { - return "Discard recon judgments for cells in column " + _columnName; - } - - protected String createDescription(Column column, - List cellChanges) { - - return "Discard recon judgments for " + cellChanges.size() + - " cells in column " + column.getName(); - } - - protected RowVisitor createRowVisitor(Project project, List cellChanges, long historyEntryID) throws Exception { - Column column = project.columnModel.getColumnByName(_columnName); - - return new RowVisitor() { - int cellIndex; - List cellChanges; - Map dupReconMap = new HashMap(); - long historyEntryID; - - public RowVisitor init(int cellIndex, List cellChanges, long historyEntryID) { - this.cellIndex = cellIndex; - this.cellChanges = cellChanges; - this.historyEntryID = historyEntryID; - return this; - } - - @Override - public void start(Project project) { - // nothing to do - } - - @Override - public void end(Project project) { - // nothing to do - } - - public boolean visit(Project project, int rowIndex, Row row) { - Cell cell = row.getCell(cellIndex); - if (cell != null && cell.recon != null) { - Recon newRecon; - if (dupReconMap.containsKey(cell.recon.id)) { - newRecon = dupReconMap.get(cell.recon.id); - newRecon.judgmentBatchSize++; - } else { - newRecon = cell.recon.dup(historyEntryID); - newRecon.match = null; - newRecon.matchRank = -1; - newRecon.judgment = Judgment.None; - newRecon.judgmentAction = "mass"; - newRecon.judgmentBatchSize = 1; - - dupReconMap.put(cell.recon.id, newRecon); - } - - Cell newCell = new Cell(cell.value, newRecon); - - CellChange cellChange = new CellChange(rowIndex, cellIndex, cell, newCell); - cellChanges.add(cellChange); - } - return false; - } - }.init(column.getCellIndex(), cellChanges, historyEntryID); - } - - protected Change createChange(Project project, Column column, List cellChanges) { - return new ReconChange( - cellChanges, - _columnName, - column.getReconConfig(), - null - ); - } -} diff --git a/main/src/com/google/gridworks/operations/recon/ReconJudgeSimilarCellsOperation.java b/main/src/com/google/gridworks/operations/recon/ReconJudgeSimilarCellsOperation.java deleted file mode 100644 index 2f736eb18..000000000 --- a/main/src/com/google/gridworks/operations/recon/ReconJudgeSimilarCellsOperation.java +++ /dev/null @@ -1,248 +0,0 @@ -package com.google.gridworks.operations.recon; - - import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Properties; - -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.browsing.RowVisitor; -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.history.Change; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Recon; -import com.google.gridworks.model.ReconCandidate; -import com.google.gridworks.model.Row; -import com.google.gridworks.model.Recon.Judgment; -import com.google.gridworks.model.changes.CellChange; -import com.google.gridworks.model.changes.ReconChange; -import com.google.gridworks.operations.EngineDependentMassCellOperation; -import com.google.gridworks.operations.OperationRegistry; - -public class ReconJudgeSimilarCellsOperation extends EngineDependentMassCellOperation { - final protected String _similarValue; - final protected Judgment _judgment; - final protected ReconCandidate _match; - final protected boolean _shareNewTopics; - - static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { - JSONObject engineConfig = obj.getJSONObject("engineConfig"); - - ReconCandidate match = null; - if (obj.has("match")) { - JSONObject matchObj = obj.getJSONObject("match"); - - JSONArray types = matchObj.getJSONArray("types"); - String[] typeIDs = new String[types.length()]; - for (int i = 0; i < typeIDs.length; i++) { - typeIDs[i] = types.getString(i); - } - - match = new ReconCandidate( - matchObj.getString("id"), - matchObj.getString("name"), - typeIDs, - matchObj.getDouble("score") - ); - } - - Judgment judgment = Judgment.None; - if (obj.has("judgment")) { - judgment = Recon.stringToJudgment(obj.getString("judgment")); - } - - return new ReconJudgeSimilarCellsOperation( - engineConfig, - obj.getString("columnName"), - obj.getString("similarValue"), - judgment, - match, - obj.has("shareNewTopics") ? obj.getBoolean("shareNewTopics") : false - ); - } - - public ReconJudgeSimilarCellsOperation( - JSONObject engineConfig, - String columnName, - String similarValue, - Judgment judgment, - ReconCandidate match, - boolean shareNewTopics - ) { - super(engineConfig, columnName, false); - this._similarValue = similarValue; - this._judgment = judgment; - this._match = match; - this._shareNewTopics = shareNewTopics; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); - writer.key("description"); writer.value(getBriefDescription(null)); - writer.key("engineConfig"); writer.value(getEngineConfig()); - writer.key("columnName"); writer.value(_columnName); - writer.key("similarValue"); writer.value(_similarValue); - writer.key("judgment"); writer.value(Recon.judgmentToString(_judgment)); - if (_match != null) { - writer.key("match"); _match.write(writer, options); - } - writer.key("shareNewTopics"); writer.value(_shareNewTopics); - - writer.endObject(); - } - - protected String getBriefDescription(Project project) { - if (_judgment == Judgment.None) { - return "Discard recon judgments for cells containing \"" + - _similarValue + "\" in column " + _columnName; - } else if (_judgment == Judgment.New) { - if (_shareNewTopics) { - return "Mark to create one single new topic for all cells containing \"" + - _similarValue + "\" in column " + _columnName; - } else { - return "Mark to create one new topic for each cell containing \"" + - _similarValue + "\" in column " + _columnName; - } - } else if (_judgment == Judgment.Matched) { - return "Match topic " + - _match.name + " (" + - _match.id + ") for cells containing \"" + - _similarValue + "\" in column " + _columnName; - } - throw new InternalError("Can't get here"); - } - - protected String createDescription(Column column, - List cellChanges) { - - if (_judgment == Judgment.None) { - return "Discard recon judgments for " + cellChanges.size() + " cells containing \"" + - _similarValue + "\" in column " + _columnName; - } else if (_judgment == Judgment.New) { - if (_shareNewTopics) { - return "Mark to create one single new topic for " + cellChanges.size() + " cells containing \"" + - _similarValue + "\" in column " + _columnName; - } else { - return "Mark to create one new topic for each of " + cellChanges.size() + " cells containing \"" + - _similarValue + "\" in column " + _columnName; - } - } else if (_judgment == Judgment.Matched) { - return "Match topic " + - _match.name + " (" + - _match.id + ") for " + - cellChanges.size() + " cells containing \"" + - _similarValue + "\" in column " + _columnName; - } - throw new InternalError("Can't get here"); - } - - protected RowVisitor createRowVisitor(Project project, List cellChanges, long historyEntryID) throws Exception { - Column column = project.columnModel.getColumnByName(_columnName); - - return new RowVisitor() { - int _cellIndex; - List _cellChanges; - Recon _sharedNewRecon = null; - Map _dupReconMap = new HashMap(); - long _historyEntryID; - - public RowVisitor init(int cellIndex, List cellChanges, long historyEntryID) { - _cellIndex = cellIndex; - _cellChanges = cellChanges; - _historyEntryID = historyEntryID; - return this; - } - - @Override - public void start(Project project) { - // nothing to do - } - - @Override - public void end(Project project) { - // nothing to do - } - - public boolean visit(Project project, int rowIndex, Row row) { - Cell cell = row.getCell(_cellIndex); - if (cell != null && ExpressionUtils.isNonBlankData(cell.value)) { - String value = cell.value instanceof String ? - ((String) cell.value) : cell.value.toString(); - - if (_similarValue.equals(value)) { - Recon recon = null; - if (_judgment == Judgment.New && _shareNewTopics) { - if (_sharedNewRecon == null) { - _sharedNewRecon = new Recon(_historyEntryID, null, null); - _sharedNewRecon.judgment = Judgment.New; - _sharedNewRecon.judgmentBatchSize = 0; - _sharedNewRecon.judgmentAction = "similar"; - } - _sharedNewRecon.judgmentBatchSize++; - - recon = _sharedNewRecon; - } else { - if (_dupReconMap.containsKey(cell.recon.id)) { - recon = _dupReconMap.get(cell.recon.id); - recon.judgmentBatchSize++; - } else { - recon = cell.recon.dup(_historyEntryID); - recon.judgmentBatchSize = 1; - recon.matchRank = -1; - recon.judgmentAction = "similar"; - - if (_judgment == Judgment.Matched) { - recon.judgment = Recon.Judgment.Matched; - recon.match = _match; - - if (recon.candidates != null) { - for (int m = 0; m < recon.candidates.size(); m++) { - if (recon.candidates.get(m).id.equals(_match.id)) { - recon.matchRank = m; - break; - } - } - } - } else if (_judgment == Judgment.New) { - recon.judgment = Recon.Judgment.New; - recon.match = null; - } else if (_judgment == Judgment.None) { - recon.judgment = Recon.Judgment.None; - recon.match = null; - } - - _dupReconMap.put(cell.recon.id, recon); - } - } - - Cell newCell = new Cell(cell.value, recon); - - CellChange cellChange = new CellChange(rowIndex, _cellIndex, cell, newCell); - _cellChanges.add(cellChange); - } - } - return false; - } - }.init(column.getCellIndex(), cellChanges, historyEntryID); - } - - - protected Change createChange(Project project, Column column, List cellChanges) { - return new ReconChange( - cellChanges, - _columnName, - column.getReconConfig(), - null - ); - } -} diff --git a/main/src/com/google/gridworks/operations/recon/ReconMarkNewTopicsOperation.java b/main/src/com/google/gridworks/operations/recon/ReconMarkNewTopicsOperation.java deleted file mode 100644 index 869c8565a..000000000 --- a/main/src/com/google/gridworks/operations/recon/ReconMarkNewTopicsOperation.java +++ /dev/null @@ -1,143 +0,0 @@ -package com.google.gridworks.operations.recon; - -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.browsing.RowVisitor; -import com.google.gridworks.history.Change; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Recon; -import com.google.gridworks.model.Row; -import com.google.gridworks.model.Recon.Judgment; -import com.google.gridworks.model.changes.CellChange; -import com.google.gridworks.model.changes.ReconChange; -import com.google.gridworks.operations.EngineDependentMassCellOperation; -import com.google.gridworks.operations.OperationRegistry; - -public class ReconMarkNewTopicsOperation extends EngineDependentMassCellOperation { - final protected boolean _shareNewTopics; - - static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { - JSONObject engineConfig = obj.getJSONObject("engineConfig"); - - return new ReconMarkNewTopicsOperation( - engineConfig, - obj.getString("columnName"), - obj.has("shareNewTopics") ? obj.getBoolean("shareNewTopics") : false - ); - } - - public ReconMarkNewTopicsOperation(JSONObject engineConfig, String columnName, boolean shareNewTopics) { - super(engineConfig, columnName, false); - _shareNewTopics = shareNewTopics; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); - writer.key("description"); writer.value(getBriefDescription(null)); - writer.key("engineConfig"); writer.value(getEngineConfig()); - writer.key("columnName"); writer.value(_columnName); - writer.key("shareNewTopics"); writer.value(_shareNewTopics); - writer.endObject(); - } - - protected String getBriefDescription(Project project) { - return "Mark to create new topics for cells in column " + _columnName + - (_shareNewTopics ? - ", one topic for each group of similar cells" : - ", one topic for each cell"); - } - - protected String createDescription(Column column, - List cellChanges) { - - return "Mark to create new topics for " + cellChanges.size() + - " cells in column " + column.getName() + - (_shareNewTopics ? - ", one topic for each group of similar cells" : - ", one topic for each cell"); - } - - protected RowVisitor createRowVisitor(Project project, List cellChanges, long historyEntryID) throws Exception { - Column column = project.columnModel.getColumnByName(_columnName); - - return new RowVisitor() { - int cellIndex; - List cellChanges; - Map sharedRecons = new HashMap(); - long historyEntryID; - - public RowVisitor init(int cellIndex, List cellChanges, long historyEntryID) { - this.cellIndex = cellIndex; - this.cellChanges = cellChanges; - this.historyEntryID = historyEntryID; - return this; - } - - @Override - public void start(Project project) { - // nothing to do - } - - @Override - public void end(Project project) { - // nothing to do - } - - public boolean visit(Project project, int rowIndex, Row row) { - Cell cell = row.getCell(cellIndex); - if (cell != null) { - Recon recon = null; - if (_shareNewTopics) { - String s = cell.value == null ? "" : cell.value.toString(); - if (sharedRecons.containsKey(s)) { - recon = sharedRecons.get(s); - recon.judgmentBatchSize++; - } else { - recon = new Recon(historyEntryID, null, null); - recon.judgment = Judgment.New; - recon.judgmentBatchSize = 1; - recon.judgmentAction = "mass"; - - sharedRecons.put(s, recon); - } - } else { - recon = cell.recon == null ? new Recon(historyEntryID, null, null) : cell.recon.dup(historyEntryID); - recon.match = null; - recon.matchRank = -1; - recon.judgment = Judgment.New; - recon.judgmentBatchSize = 1; - recon.judgmentAction = "mass"; - } - - Cell newCell = new Cell(cell.value, recon); - - CellChange cellChange = new CellChange(rowIndex, cellIndex, cell, newCell); - cellChanges.add(cellChange); - } - return false; - } - }.init(column.getCellIndex(), cellChanges, historyEntryID); - } - - protected Change createChange(Project project, Column column, List cellChanges) { - return new ReconChange( - cellChanges, - _columnName, - column.getReconConfig(), - null - ); - } -} diff --git a/main/src/com/google/gridworks/operations/recon/ReconMatchBestCandidatesOperation.java b/main/src/com/google/gridworks/operations/recon/ReconMatchBestCandidatesOperation.java deleted file mode 100644 index aa305842a..000000000 --- a/main/src/com/google/gridworks/operations/recon/ReconMatchBestCandidatesOperation.java +++ /dev/null @@ -1,133 +0,0 @@ -package com.google.gridworks.operations.recon; - -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.browsing.RowVisitor; -import com.google.gridworks.history.Change; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Recon; -import com.google.gridworks.model.ReconCandidate; -import com.google.gridworks.model.Row; -import com.google.gridworks.model.Recon.Judgment; -import com.google.gridworks.model.changes.CellChange; -import com.google.gridworks.model.changes.ReconChange; -import com.google.gridworks.operations.EngineDependentMassCellOperation; -import com.google.gridworks.operations.OperationRegistry; - -public class ReconMatchBestCandidatesOperation extends EngineDependentMassCellOperation { - static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { - JSONObject engineConfig = obj.getJSONObject("engineConfig"); - String columnName = obj.getString("columnName"); - - return new ReconMatchBestCandidatesOperation( - engineConfig, - columnName - ); - } - - public ReconMatchBestCandidatesOperation(JSONObject engineConfig, String columnName) { - super(engineConfig, columnName, false); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); - writer.key("description"); writer.value(getBriefDescription(null)); - writer.key("engineConfig"); writer.value(getEngineConfig()); - writer.key("columnName"); writer.value(_columnName); - writer.endObject(); - } - - protected String getBriefDescription(Project project) { - return "Match each cell to its best recon candidate in column " + _columnName; - } - - protected String createDescription(Column column, - List cellChanges) { - - return "Match each of " + cellChanges.size() + - " cells to its best candidate in column " + column.getName(); - } - - protected RowVisitor createRowVisitor(Project project, List cellChanges, long historyEntryID) throws Exception { - Column column = project.columnModel.getColumnByName(_columnName); - - return new RowVisitor() { - int cellIndex; - List cellChanges; - Map dupReconMap = new HashMap(); - long historyEntryID; - - public RowVisitor init(int cellIndex, List cellChanges, long historyEntryID) { - this.cellIndex = cellIndex; - this.cellChanges = cellChanges; - this.historyEntryID = historyEntryID; - return this; - } - - @Override - public void start(Project project) { - // nothing to do - } - - @Override - public void end(Project project) { - // nothing to do - } - - public boolean visit(Project project, int rowIndex, Row row) { - if (cellIndex < row.cells.size()) { - Cell cell = row.cells.get(cellIndex); - if (cell != null && cell.recon != null) { - ReconCandidate candidate = cell.recon.getBestCandidate(); - if (candidate != null) { - Recon newRecon; - if (dupReconMap.containsKey(cell.recon.id)) { - newRecon = dupReconMap.get(cell.recon.id); - newRecon.judgmentBatchSize++; - } else { - newRecon = cell.recon.dup(historyEntryID); - newRecon.judgmentBatchSize = 1; - newRecon.match = candidate; - newRecon.matchRank = 0; - newRecon.judgment = Judgment.Matched; - newRecon.judgmentAction = "mass"; - - dupReconMap.put(cell.recon.id, newRecon); - } - Cell newCell = new Cell( - cell.value, - newRecon - ); - - CellChange cellChange = new CellChange(rowIndex, cellIndex, cell, newCell); - cellChanges.add(cellChange); - } - } - } - return false; - } - }.init(column.getCellIndex(), cellChanges, historyEntryID); - } - - protected Change createChange(Project project, Column column, List cellChanges) { - return new ReconChange( - cellChanges, - _columnName, - column.getReconConfig(), - null - ); - } -} diff --git a/main/src/com/google/gridworks/operations/recon/ReconMatchSpecificTopicOperation.java b/main/src/com/google/gridworks/operations/recon/ReconMatchSpecificTopicOperation.java deleted file mode 100644 index d7b7b0232..000000000 --- a/main/src/com/google/gridworks/operations/recon/ReconMatchSpecificTopicOperation.java +++ /dev/null @@ -1,182 +0,0 @@ -package com.google.gridworks.operations.recon; - -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Properties; - -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.browsing.RowVisitor; -import com.google.gridworks.history.Change; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Recon; -import com.google.gridworks.model.ReconCandidate; -import com.google.gridworks.model.Row; -import com.google.gridworks.model.Recon.Judgment; -import com.google.gridworks.model.changes.CellChange; -import com.google.gridworks.model.changes.ReconChange; -import com.google.gridworks.operations.EngineDependentMassCellOperation; -import com.google.gridworks.operations.OperationRegistry; - -public class ReconMatchSpecificTopicOperation extends EngineDependentMassCellOperation { - final protected ReconCandidate match; - final protected String identifierSpace; - final protected String schemaSpace; - - static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { - JSONObject engineConfig = obj.getJSONObject("engineConfig"); - - JSONObject match = obj.getJSONObject("match"); - - JSONArray types = obj.getJSONArray("types"); - String[] typeIDs = new String[types.length()]; - for (int i = 0; i < typeIDs.length; i++) { - typeIDs[i] = types.getString(i); - } - - return new ReconMatchSpecificTopicOperation( - engineConfig, - obj.getString("columnName"), - new ReconCandidate( - match.getString("id"), - match.getString("name"), - typeIDs, - 100 - ), - obj.getString("identifierSpace"), - obj.getString("schemaSpace") - ); - } - - public ReconMatchSpecificTopicOperation( - JSONObject engineConfig, - String columnName, - ReconCandidate match, - String identifierSpace, - String schemaSpace - ) { - super(engineConfig, columnName, false); - this.match = match; - this.identifierSpace = identifierSpace; - this.schemaSpace = schemaSpace; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); - writer.key("description"); writer.value(getBriefDescription(null)); - writer.key("engineConfig"); writer.value(getEngineConfig()); - writer.key("columnName"); writer.value(_columnName); - writer.key("match"); - writer.object(); - writer.key("id"); writer.value(match.id); - writer.key("name"); writer.value(match.name); - writer.key("types"); - writer.array(); - for (String typeID : match.types) { - writer.value(typeID); - } - writer.endArray(); - writer.endObject(); - writer.key("identifierSpace"); writer.value(identifierSpace); - writer.key("schemaSpace"); writer.value(schemaSpace); - writer.endObject(); - } - - protected String getBriefDescription(Project project) { - return "Match specific topic " + - match.name + " (" + - match.id + ") to cells in column " + _columnName; - } - - protected String createDescription(Column column, - List cellChanges) { - return "Match specific topic " + - match.name + " (" + - match.id + ") to " + cellChanges.size() + - " cells in column " + column.getName(); - } - - protected RowVisitor createRowVisitor(Project project, List cellChanges, long historyEntryID) throws Exception { - Column column = project.columnModel.getColumnByName(_columnName); - - return new RowVisitor() { - int cellIndex; - List cellChanges; - Map dupReconMap = new HashMap(); - long historyEntryID; - - public RowVisitor init(int cellIndex, List cellChanges, long historyEntryID) { - this.cellIndex = cellIndex; - this.cellChanges = cellChanges; - this.historyEntryID = historyEntryID; - return this; - } - - @Override - public void start(Project project) { - // nothing to do - } - - @Override - public void end(Project project) { - // nothing to do - } - - public boolean visit(Project project, int rowIndex, Row row) { - Cell cell = row.getCell(cellIndex); - if (cell != null) { - long reconID = cell.recon != null ? cell.recon.id : 0; - - Recon newRecon; - if (dupReconMap.containsKey(reconID)) { - newRecon = dupReconMap.get(reconID); - newRecon.judgmentBatchSize++; - } else { - newRecon = cell.recon != null ? - cell.recon.dup(historyEntryID) : - new Recon( - historyEntryID, - identifierSpace, - schemaSpace); - - newRecon.match = match; - newRecon.matchRank = -1; - newRecon.judgment = Judgment.Matched; - newRecon.judgmentAction = "mass"; - newRecon.judgmentBatchSize = 1; - - dupReconMap.put(reconID, newRecon); - } - - Cell newCell = new Cell( - cell.value, - newRecon - ); - - CellChange cellChange = new CellChange(rowIndex, cellIndex, cell, newCell); - cellChanges.add(cellChange); - } - return false; - } - }.init(column.getCellIndex(), cellChanges, historyEntryID); - } - - protected Change createChange(Project project, Column column, List cellChanges) { - return new ReconChange( - cellChanges, - _columnName, - column.getReconConfig(), - null - ); - } -} diff --git a/main/src/com/google/gridworks/operations/recon/ReconOperation.java b/main/src/com/google/gridworks/operations/recon/ReconOperation.java deleted file mode 100644 index 13efe8a71..000000000 --- a/main/src/com/google/gridworks/operations/recon/ReconOperation.java +++ /dev/null @@ -1,298 +0,0 @@ -package com.google.gridworks.operations.recon; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.browsing.Engine; -import com.google.gridworks.browsing.FilteredRows; -import com.google.gridworks.browsing.RowVisitor; -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.history.Change; -import com.google.gridworks.history.HistoryEntry; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Recon; -import com.google.gridworks.model.Row; -import com.google.gridworks.model.changes.CellChange; -import com.google.gridworks.model.changes.ReconChange; -import com.google.gridworks.model.recon.ReconConfig; -import com.google.gridworks.model.recon.ReconJob; -import com.google.gridworks.model.recon.StandardReconConfig; -import com.google.gridworks.operations.EngineDependentOperation; -import com.google.gridworks.operations.OperationRegistry; -import com.google.gridworks.process.LongRunningProcess; -import com.google.gridworks.process.Process; - -public class ReconOperation extends EngineDependentOperation { - final protected String _columnName; - final protected ReconConfig _reconConfig; - - static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { - JSONObject engineConfig = obj.getJSONObject("engineConfig"); - - return new ReconOperation( - engineConfig, - obj.getString("columnName"), - ReconConfig.reconstruct(obj.getJSONObject("config")) - ); - } - - public ReconOperation( - JSONObject engineConfig, - String columnName, - ReconConfig reconConfig - ) { - super(engineConfig); - _columnName = columnName; - _reconConfig = reconConfig; - } - - public Process createProcess(Project project, Properties options) throws Exception { - return new ReconProcess( - project, - getEngineConfig(), - getBriefDescription(null) - ); - } - - protected String getBriefDescription(Project project) { - return _reconConfig.getBriefDescription(project, _columnName); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); - writer.key("description"); writer.value(getBriefDescription(null)); - writer.key("columnName"); writer.value(_columnName); - writer.key("config"); _reconConfig.write(writer, options); - writer.key("engineConfig"); writer.value(getEngineConfig()); - writer.endObject(); - } - - static protected class ReconEntry { - final public int rowIndex; - final public Cell cell; - - public ReconEntry(int rowIndex, Cell cell) { - this.rowIndex = rowIndex; - this.cell = cell; - } - } - static protected class JobGroup { - final public ReconJob job; - final public List entries = new ArrayList(); - - public JobGroup(ReconJob job) { - this.job = job; - } - } - - public class ReconProcess extends LongRunningProcess implements Runnable { - final protected Project _project; - final protected JSONObject _engineConfig; - final protected long _historyEntryID; - protected List _entries; - protected int _cellIndex; - - public ReconProcess( - Project project, - JSONObject engineConfig, - String description - ) { - super(description); - _project = project; - _engineConfig = engineConfig; - _historyEntryID = HistoryEntry.allocateID(); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("id"); writer.value(hashCode()); - writer.key("description"); writer.value(_description); - writer.key("immediate"); writer.value(false); - writer.key("status"); writer.value(_thread == null ? "pending" : (_thread.isAlive() ? "running" : "done")); - writer.key("progress"); writer.value(_progress); - writer.key("onDone"); - writer.array(); - writer.object(); - writer.key("action"); writer.value("createFacet"); - writer.key("facetType"); writer.value("list"); - writer.key("facetConfig"); - writer.object(); - writer.key("name"); writer.value(_columnName + ": judgment"); - writer.key("columnName"); writer.value(_columnName); - writer.key("expression"); writer.value("cell.recon.judgment"); - writer.key("omitError"); writer.value(true); - writer.endObject(); - writer.key("facetOptions"); - writer.object(); - writer.key("scroll"); writer.value(false); - writer.endObject(); - writer.endObject(); - - if (_reconConfig instanceof StandardReconConfig) { - writer.object(); - writer.key("action"); writer.value("createFacet"); - writer.key("facetType"); writer.value("range"); - writer.key("facetConfig"); - writer.object(); - writer.key("name"); writer.value(_columnName + ": best candidate's score"); - writer.key("columnName"); writer.value(_columnName); - writer.key("expression"); writer.value("cell.recon.best.score"); - writer.key("mode"); writer.value("range"); - writer.endObject(); - writer.endObject(); - } - writer.endArray(); - writer.endObject(); - } - - protected Runnable getRunnable() { - return this; - } - - protected void populateEntries() throws Exception { - Engine engine = new Engine(_project); - engine.initializeFromJSON(_engineConfig); - - Column column = _project.columnModel.getColumnByName(_columnName); - if (column == null) { - throw new Exception("No column named " + _columnName); - } - - _entries = new ArrayList(_project.rows.size()); - _cellIndex = column.getCellIndex(); - - FilteredRows filteredRows = engine.getAllFilteredRows(); - filteredRows.accept(_project, new RowVisitor() { - @Override - public void start(Project project) { - // nothing to do - } - - @Override - public void end(Project project) { - // nothing to do - } - - public boolean visit(Project project, int rowIndex, Row row) { - if (_cellIndex < row.cells.size()) { - Cell cell = row.cells.get(_cellIndex); - if (cell != null && ExpressionUtils.isNonBlankData(cell.value)) { - _entries.add(new ReconEntry(rowIndex, cell)); - } - } - return false; - } - }); - } - - public void run() { - try { - populateEntries(); - } catch (Exception e2) { - // TODO : Not sure what to do here? - e2.printStackTrace(); - } - - Map jobKeyToGroup = new HashMap(); - - for (ReconEntry entry : _entries) { - ReconJob job = _reconConfig.createJob( - _project, - entry.rowIndex, - _project.rows.get(entry.rowIndex), - _columnName, - entry.cell - ); - - int key = job.getKey(); - JobGroup group = jobKeyToGroup.get(key); - if (group == null) { - group = new JobGroup(job); - jobKeyToGroup.put(key, group); - } - group.entries.add(entry); - } - - List cellChanges = new ArrayList(_entries.size()); - List groups = new ArrayList(jobKeyToGroup.values()); - - int batchSize = _reconConfig.getBatchSize(); - for (int i = 0; i < groups.size(); i += batchSize) { - int to = Math.min(i + batchSize, groups.size()); - - List jobs = new ArrayList(to - i); - for (int j = i; j < to; j++) { - jobs.add(groups.get(j).job); - } - - List recons = _reconConfig.batchRecon(jobs, _historyEntryID); - for (int j = i; j < to; j++) { - int index = j - i; - Recon recon = index < recons.size() ? recons.get(j - i) : null; - List entries = groups.get(j).entries; - - if (recon != null) { - recon.judgmentBatchSize = entries.size(); - } - - for (ReconEntry entry : entries) { - Cell oldCell = entry.cell; - Cell newCell = new Cell(oldCell.value, recon); - - CellChange cellChange = new CellChange( - entry.rowIndex, - _cellIndex, - oldCell, - newCell - ); - cellChanges.add(cellChange); - } - } - - _progress = i * 100 / groups.size(); - try { - Thread.sleep(50); - } catch (InterruptedException e) { - if (_canceled) { - break; - } - } - } - - if (!_canceled) { - Change reconChange = new ReconChange( - cellChanges, - _columnName, - _reconConfig, - null - ); - - HistoryEntry historyEntry = new HistoryEntry( - _historyEntryID, - _project, - _description, - ReconOperation.this, - reconChange - ); - - _project.history.addEntry(historyEntry); - _project.processManager.onDoneProcess(this); - } - } - } -} diff --git a/main/src/com/google/gridworks/operations/row/DenormalizeOperation.java b/main/src/com/google/gridworks/operations/row/DenormalizeOperation.java deleted file mode 100644 index 4d018069c..000000000 --- a/main/src/com/google/gridworks/operations/row/DenormalizeOperation.java +++ /dev/null @@ -1,82 +0,0 @@ -package com.google.gridworks.operations.row; - -import java.util.ArrayList; -import java.util.List; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.history.HistoryEntry; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; -import com.google.gridworks.model.RecordModel.CellDependency; -import com.google.gridworks.model.RecordModel.RowDependency; -import com.google.gridworks.model.changes.MassRowChange; -import com.google.gridworks.operations.OperationRegistry; - -public class DenormalizeOperation extends AbstractOperation { - static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { - return new DenormalizeOperation(); - } - - public DenormalizeOperation() { - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); - writer.key("description"); writer.value("Denormalize"); - writer.endObject(); - } - - - protected String getBriefDescription(Project project) { - return "Denormalize"; - } - - protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception { - List newRows = new ArrayList(); - - List oldRows = project.rows; - for (int r = 0; r < oldRows.size(); r++) { - Row oldRow = oldRows.get(r); - Row newRow = null; - - RowDependency rd = project.recordModel.getRowDependency(r); - if (rd.cellDependencies != null) { - newRow = oldRow.dup(); - - for (int c = 0; c < rd.cellDependencies.length; c++) { - CellDependency cd = rd.cellDependencies[c]; - if (cd != null) { - int contextRowIndex = cd.rowIndex; - int contextCellIndex = cd.cellIndex; - - if (contextRowIndex >= 0 && contextRowIndex < oldRows.size()) { - Row contextRow = oldRows.get(contextRowIndex); - Cell contextCell = contextRow.getCell(contextCellIndex); - - newRow.setCell(contextCellIndex, contextCell); - } - } - } - } - - newRows.add(newRow != null ? newRow : oldRow); - } - - return new HistoryEntry( - historyEntryID, - project, - getBriefDescription(project), - DenormalizeOperation.this, - new MassRowChange(newRows) - ); - } -} diff --git a/main/src/com/google/gridworks/operations/row/RowFlagOperation.java b/main/src/com/google/gridworks/operations/row/RowFlagOperation.java deleted file mode 100644 index ad25bc663..000000000 --- a/main/src/com/google/gridworks/operations/row/RowFlagOperation.java +++ /dev/null @@ -1,103 +0,0 @@ -package com.google.gridworks.operations.row; - -import java.util.ArrayList; -import java.util.List; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.browsing.Engine; -import com.google.gridworks.browsing.FilteredRows; -import com.google.gridworks.browsing.RowVisitor; -import com.google.gridworks.history.Change; -import com.google.gridworks.history.HistoryEntry; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; -import com.google.gridworks.model.changes.MassChange; -import com.google.gridworks.model.changes.RowFlagChange; -import com.google.gridworks.operations.EngineDependentOperation; -import com.google.gridworks.operations.OperationRegistry; - -public class RowFlagOperation extends EngineDependentOperation { - final protected boolean _flagged; - - static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { - JSONObject engineConfig = obj.getJSONObject("engineConfig"); - boolean flagged = obj.getBoolean("flagged"); - - return new RowFlagOperation( - engineConfig, - flagged - ); - } - - public RowFlagOperation(JSONObject engineConfig, boolean flagged) { - super(engineConfig); - _flagged = flagged; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); - writer.key("description"); writer.value(getBriefDescription(null)); - writer.key("engineConfig"); writer.value(getEngineConfig()); - writer.key("flagged"); writer.value(_flagged); - writer.endObject(); - } - - protected String getBriefDescription(Project project) { - return (_flagged ? "Flag rows" : "Unflag rows"); - } - - protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception { - Engine engine = createEngine(project); - - List changes = new ArrayList(project.rows.size()); - - FilteredRows filteredRows = engine.getAllFilteredRows(); - filteredRows.accept(project, createRowVisitor(project, changes)); - - return new HistoryEntry( - historyEntryID, - project, - (_flagged ? "Flag" : "Unflag") + " " + changes.size() + " rows", - this, - new MassChange(changes, false) - ); - } - - protected RowVisitor createRowVisitor(Project project, List changes) throws Exception { - return new RowVisitor() { - List changes; - - public RowVisitor init(List changes) { - this.changes = changes; - return this; - } - - @Override - public void start(Project project) { - // nothing to do - } - - @Override - public void end(Project project) { - // nothing to do - } - - public boolean visit(Project project, int rowIndex, Row row) { - if (row.flagged != _flagged) { - RowFlagChange change = new RowFlagChange(rowIndex, _flagged); - - changes.add(change); - } - return false; - } - }.init(changes); - } -} diff --git a/main/src/com/google/gridworks/operations/row/RowRemovalOperation.java b/main/src/com/google/gridworks/operations/row/RowRemovalOperation.java deleted file mode 100644 index 3be5f77b0..000000000 --- a/main/src/com/google/gridworks/operations/row/RowRemovalOperation.java +++ /dev/null @@ -1,92 +0,0 @@ -package com.google.gridworks.operations.row; - - import java.util.ArrayList; -import java.util.List; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.browsing.Engine; -import com.google.gridworks.browsing.FilteredRows; -import com.google.gridworks.browsing.RowVisitor; -import com.google.gridworks.history.HistoryEntry; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; -import com.google.gridworks.model.changes.RowRemovalChange; -import com.google.gridworks.operations.EngineDependentOperation; -import com.google.gridworks.operations.OperationRegistry; - -public class RowRemovalOperation extends EngineDependentOperation { - static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { - JSONObject engineConfig = obj.getJSONObject("engineConfig"); - - return new RowRemovalOperation( - engineConfig - ); - } - - public RowRemovalOperation(JSONObject engineConfig) { - super(engineConfig); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); - writer.key("description"); writer.value(getBriefDescription(null)); - writer.key("engineConfig"); writer.value(getEngineConfig()); - writer.endObject(); - } - - protected String getBriefDescription(Project project) { - return "Remove rows"; - } - - protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception { - Engine engine = createEngine(project); - - List rowIndices = new ArrayList(); - - FilteredRows filteredRows = engine.getAllFilteredRows(); - filteredRows.accept(project, createRowVisitor(project, rowIndices)); - - return new HistoryEntry( - historyEntryID, - project, - "Remove " + rowIndices.size() + " rows", - this, - new RowRemovalChange(rowIndices) - ); - } - - protected RowVisitor createRowVisitor(Project project, List rowIndices) throws Exception { - return new RowVisitor() { - List rowIndices; - - public RowVisitor init(List rowIndices) { - this.rowIndices = rowIndices; - return this; - } - - @Override - public void start(Project project) { - // nothing to do - } - - @Override - public void end(Project project) { - // nothing to do - } - - public boolean visit(Project project, int rowIndex, Row row) { - rowIndices.add(rowIndex); - - return false; - } - }.init(rowIndices); - } -} diff --git a/main/src/com/google/gridworks/operations/row/RowReorderOperation.java b/main/src/com/google/gridworks/operations/row/RowReorderOperation.java deleted file mode 100644 index 5f967661d..000000000 --- a/main/src/com/google/gridworks/operations/row/RowReorderOperation.java +++ /dev/null @@ -1,126 +0,0 @@ -package com.google.gridworks.operations.row; - - import java.util.ArrayList; -import java.util.List; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.browsing.Engine; -import com.google.gridworks.browsing.RecordVisitor; -import com.google.gridworks.browsing.RowVisitor; -import com.google.gridworks.browsing.Engine.Mode; -import com.google.gridworks.history.HistoryEntry; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Record; -import com.google.gridworks.model.Row; -import com.google.gridworks.model.changes.RowReorderChange; -import com.google.gridworks.operations.OperationRegistry; -import com.google.gridworks.sorting.SortingRecordVisitor; -import com.google.gridworks.sorting.SortingRowVisitor; - -public class RowReorderOperation extends AbstractOperation { - static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { - String mode = obj.getString("mode"); - JSONObject sorting = obj.has("sorting") && !obj.isNull("sorting") ? - obj.getJSONObject("sorting") : null; - - return new RowReorderOperation(Engine.stringToMode(mode), sorting); - } - - final protected Mode _mode; - final protected JSONObject _sorting; - - public RowReorderOperation(Mode mode, JSONObject sorting) { - _mode = mode; - _sorting = sorting; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); - writer.key("description"); writer.value(getBriefDescription(null)); - writer.key("mode"); writer.value(Engine.modeToString(_mode)); - writer.key("sorting"); writer.value(_sorting); - writer.endObject(); - } - - protected String getBriefDescription(Project project) { - return "Reorder rows"; - } - - protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception { - Engine engine = new Engine(project); - engine.setMode(_mode); - - List rowIndices = new ArrayList(); - if (_mode == Mode.RowBased) { - RowVisitor visitor = new IndexingVisitor(rowIndices); - if (_sorting != null) { - SortingRowVisitor srv = new SortingRowVisitor(visitor); - - srv.initializeFromJSON(project, _sorting); - if (srv.hasCriteria()) { - visitor = srv; - } - } - - engine.getAllRows().accept(project, visitor); - } else { - RecordVisitor visitor = new IndexingVisitor(rowIndices); - if (_sorting != null) { - SortingRecordVisitor srv = new SortingRecordVisitor(visitor); - - srv.initializeFromJSON(project, _sorting); - if (srv.hasCriteria()) { - visitor = srv; - } - } - - engine.getAllRecords().accept(project, visitor); - } - - return new HistoryEntry( - historyEntryID, - project, - "Reorder rows", - this, - new RowReorderChange(rowIndices) - ); - } - - static protected class IndexingVisitor implements RowVisitor, RecordVisitor { - List _indices; - - IndexingVisitor(List indices) { - _indices = indices; - } - - @Override - public void start(Project project) { - } - - @Override - public void end(Project project) { - } - - @Override - public boolean visit(Project project, int rowIndex, Row row) { - _indices.add(rowIndex); - return false; - } - - @Override - public boolean visit(Project project, Record record) { - for (int r = record.fromRowIndex; r < record.toRowIndex; r++) { - _indices.add(r); - } - return false; - } - } -} diff --git a/main/src/com/google/gridworks/operations/row/RowStarOperation.java b/main/src/com/google/gridworks/operations/row/RowStarOperation.java deleted file mode 100644 index d8d3a7815..000000000 --- a/main/src/com/google/gridworks/operations/row/RowStarOperation.java +++ /dev/null @@ -1,103 +0,0 @@ -package com.google.gridworks.operations.row; - - import java.util.ArrayList; -import java.util.List; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.browsing.Engine; -import com.google.gridworks.browsing.FilteredRows; -import com.google.gridworks.browsing.RowVisitor; -import com.google.gridworks.history.Change; -import com.google.gridworks.history.HistoryEntry; -import com.google.gridworks.model.AbstractOperation; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; -import com.google.gridworks.model.changes.MassChange; -import com.google.gridworks.model.changes.RowStarChange; -import com.google.gridworks.operations.EngineDependentOperation; -import com.google.gridworks.operations.OperationRegistry; - -public class RowStarOperation extends EngineDependentOperation { - final protected boolean _starred; - - static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { - JSONObject engineConfig = obj.getJSONObject("engineConfig"); - boolean starred = obj.getBoolean("starred"); - - return new RowStarOperation( - engineConfig, - starred - ); - } - - public RowStarOperation(JSONObject engineConfig, boolean starred) { - super(engineConfig); - _starred = starred; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); - writer.key("description"); writer.value(getBriefDescription(null)); - writer.key("engineConfig"); writer.value(getEngineConfig()); - writer.key("starred"); writer.value(_starred); - writer.endObject(); - } - - protected String getBriefDescription(Project project) { - return (_starred ? "Star rows" : "Unstar rows"); - } - - protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception { - Engine engine = createEngine(project); - - List changes = new ArrayList(project.rows.size()); - - FilteredRows filteredRows = engine.getAllFilteredRows(); - filteredRows.accept(project, createRowVisitor(project, changes)); - - return new HistoryEntry( - historyEntryID, - project, - (_starred ? "Star" : "Unstar") + " " + changes.size() + " rows", - this, - new MassChange(changes, false) - ); - } - - protected RowVisitor createRowVisitor(Project project, List changes) throws Exception { - return new RowVisitor() { - List changes; - - public RowVisitor init(List changes) { - this.changes = changes; - return this; - } - - @Override - public void start(Project project) { - // nothing to do - } - - @Override - public void end(Project project) { - // nothing to do - } - - public boolean visit(Project project, int rowIndex, Row row) { - if (row.starred != _starred) { - RowStarChange change = new RowStarChange(rowIndex, _starred); - - changes.add(change); - } - return false; - } - }.init(changes); - } -} diff --git a/main/src/com/google/gridworks/preference/PreferenceStore.java b/main/src/com/google/gridworks/preference/PreferenceStore.java deleted file mode 100644 index 9d3c41c09..000000000 --- a/main/src/com/google/gridworks/preference/PreferenceStore.java +++ /dev/null @@ -1,90 +0,0 @@ -package com.google.gridworks.preference; - -import java.lang.reflect.Method; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Map; -import java.util.Properties; -import java.util.Set; - -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.GridworksServlet; -import com.google.gridworks.Jsonizable; - -public class PreferenceStore implements Jsonizable { - protected Map _prefs = new HashMap(); - - public void put(String key, Object value) { - if (value == null) { - _prefs.remove(key); - } else { - _prefs.put(key, value); - } - } - - public Object get(String key) { - return _prefs.get(key); - } - - public Set getKeys() { - return _prefs.keySet(); - } - - @Override - public void write(JSONWriter writer, Properties options) throws JSONException { - writer.object(); - - writer.key("entries"); - writer.object(); - for (String k : _prefs.keySet()) { - writer.key(k); - - Object o = _prefs.get(k); - if (o instanceof Jsonizable) { - ((Jsonizable) o).write(writer, options); - } else { - writer.value(o); - } - } - writer.endObject(); - - writer.endObject(); - } - - @SuppressWarnings("unchecked") - public void load(JSONObject obj) throws JSONException { - if (obj.has("entries") && !obj.isNull("entries")) { - JSONObject entries = obj.getJSONObject("entries"); - - Iterator i = entries.keys(); - while (i.hasNext()) { - String key = i.next(); - if (!entries.isNull(key)) { - Object o = entries.get(key); - _prefs.put(key, loadObject(o)); - } - } - } - } - - static public Object loadObject(Object o) { - if (o instanceof JSONObject) { - try { - JSONObject obj2 = (JSONObject) o; - String className = obj2.getString("class"); - Class klass = GridworksServlet.getClass(className); - Method method = klass.getMethod("load", JSONObject.class); - - return method.invoke(null, obj2); - } catch (Exception e) { - e.printStackTrace(); - return null; - } - } else { - return o; - } - } -} diff --git a/main/src/com/google/gridworks/preference/TopList.java b/main/src/com/google/gridworks/preference/TopList.java deleted file mode 100644 index d358da176..000000000 --- a/main/src/com/google/gridworks/preference/TopList.java +++ /dev/null @@ -1,72 +0,0 @@ -package com.google.gridworks.preference; - -import java.util.ArrayList; -import java.util.List; -import java.util.Properties; - -import org.apache.commons.collections.list.UnmodifiableList; -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.Jsonizable; - - -public class TopList implements Jsonizable { - private static final long serialVersionUID = 2666669643063493350L; - - final protected int _top; - final protected List _list = new ArrayList(); - - public TopList(int top) { - _top = top; - } - - @SuppressWarnings("unchecked") - public List getList() { - return (List) UnmodifiableList.decorate(_list); - } - - public void add(String element) { - _list.remove(element); - _list.add(0, element); - while (_list.size() > _top) { - _list.remove(_list.size() - 1); - } - } - - @Override - public void write(JSONWriter writer, Properties options) throws JSONException { - writer.object(); - writer.key("class"); writer.value(this.getClass().getName()); - - writer.key("top"); writer.value(_top); - writer.key("list"); - writer.array(); - for (String element : _list) { - writer.value(element); - } - writer.endArray(); - writer.endObject(); - } - - static public TopList load(JSONObject obj) throws JSONException { - int top = obj.has("top") && !obj.isNull("top") ? obj.getInt("top") : 10; - TopList tl = new TopList(top); - - if (obj.has("list") && !obj.isNull("list")) { - JSONArray a = obj.getJSONArray("list"); - - tl.load(a); - } - return tl; - } - - public void load(JSONArray a) throws JSONException { - int length = a.length(); - for (int i = 0; i < length && _list.size() < _top; i++) { - _list.add(a.getString(i)); - } - } -} diff --git a/main/src/com/google/gridworks/process/LongRunningProcess.java b/main/src/com/google/gridworks/process/LongRunningProcess.java deleted file mode 100644 index f486845a2..000000000 --- a/main/src/com/google/gridworks/process/LongRunningProcess.java +++ /dev/null @@ -1,71 +0,0 @@ -package com.google.gridworks.process; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.history.HistoryEntry; - -abstract public class LongRunningProcess extends Process { - final protected String _description; - protected ProcessManager _manager; - protected Thread _thread; - protected int _progress; // out of 100 - protected boolean _canceled; - - protected LongRunningProcess(String description) { - _description = description; - } - - public void cancel() { - _canceled = true; - if (_thread != null && _thread.isAlive()) { - _thread.interrupt(); - } - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("id"); writer.value(hashCode()); - writer.key("description"); writer.value(_description); - writer.key("immediate"); writer.value(false); - writer.key("status"); writer.value(_thread == null ? "pending" : (_thread.isAlive() ? "running" : "done")); - writer.key("progress"); writer.value(_progress); - writer.endObject(); - } - - @Override - public boolean isImmediate() { - return false; - } - - @Override - public boolean isRunning() { - return _thread != null && _thread.isAlive(); - } - - @Override - public boolean isDone() { - return _thread != null && !_thread.isAlive(); - } - - @Override - public HistoryEntry performImmediate() { - throw new RuntimeException("Not an immediate process"); - } - - @Override - public void startPerforming(ProcessManager manager) { - if (_thread == null) { - _manager = manager; - - _thread = new Thread(getRunnable()); - _thread.start(); - } - } - - abstract protected Runnable getRunnable(); -} diff --git a/main/src/com/google/gridworks/process/Process.java b/main/src/com/google/gridworks/process/Process.java deleted file mode 100644 index 89426461d..000000000 --- a/main/src/com/google/gridworks/process/Process.java +++ /dev/null @@ -1,16 +0,0 @@ -package com.google.gridworks.process; - -import com.google.gridworks.Jsonizable; -import com.google.gridworks.history.HistoryEntry; - -public abstract class Process implements Jsonizable { - abstract public boolean isImmediate(); - - abstract public boolean isRunning(); - abstract public boolean isDone(); - - abstract public HistoryEntry performImmediate() throws Exception; - - abstract public void startPerforming(ProcessManager manager); - abstract public void cancel(); -} diff --git a/main/src/com/google/gridworks/process/ProcessManager.java b/main/src/com/google/gridworks/process/ProcessManager.java deleted file mode 100644 index ee47cfa63..000000000 --- a/main/src/com/google/gridworks/process/ProcessManager.java +++ /dev/null @@ -1,95 +0,0 @@ -package com.google.gridworks.process; - -import java.util.LinkedList; -import java.util.List; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.Jsonizable; -import com.google.gridworks.history.HistoryEntry; -import com.google.gridworks.history.HistoryProcess; - -public class ProcessManager implements Jsonizable { - protected List _processes = new LinkedList(); - - public ProcessManager() { - - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("processes"); writer.array(); - for (Process p : _processes) { - p.write(writer, options); - } - writer.endArray(); - - writer.endObject(); - } - - public HistoryEntry queueProcess(Process process) throws Exception { - if (process.isImmediate() && _processes.size() == 0) { - return process.performImmediate(); - } else { - _processes.add(process); - - update(); - } - return null; - } - - public boolean queueProcess(HistoryProcess process) throws Exception { - if (process.isImmediate() && _processes.size() == 0) { - return process.performImmediate() != null; - } else { - _processes.add(process); - - update(); - } - return false; - } - - public boolean hasPending() { - return _processes.size() > 0; - } - - public void onDoneProcess(Process p) { - _processes.remove(p); - update(); - } - - public void cancelAll() { - for (Process p : _processes) { - if (!p.isImmediate() && p.isRunning()) { - p.cancel(); - } - } - _processes.clear(); - } - - protected void update() { - while (_processes.size() > 0) { - Process p = _processes.get(0); - if (p.isImmediate()) { - try { - p.performImmediate(); - } catch (Exception e) { - // TODO: Not sure what to do yet - e.printStackTrace(); - } - _processes.remove(0); - } else if (p.isDone()) { - _processes.remove(0); - } else { - if (!p.isRunning()) { - p.startPerforming(this); - } - break; - } - } - } -} diff --git a/main/src/com/google/gridworks/process/QuickHistoryEntryProcess.java b/main/src/com/google/gridworks/process/QuickHistoryEntryProcess.java deleted file mode 100644 index f4a0f203e..000000000 --- a/main/src/com/google/gridworks/process/QuickHistoryEntryProcess.java +++ /dev/null @@ -1,66 +0,0 @@ -package com.google.gridworks.process; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.history.HistoryEntry; -import com.google.gridworks.model.Project; - -abstract public class QuickHistoryEntryProcess extends Process { - final protected Project _project; - final protected String _briefDescription; - protected HistoryEntry _historyEntry; - boolean _done = false; - - public QuickHistoryEntryProcess(Project project, String briefDescription) { - _project = project; - _briefDescription = briefDescription; - } - - public void cancel() { - throw new RuntimeException("Not a long-running process"); - } - - public boolean isImmediate() { - return true; - } - - public boolean isRunning() { - throw new RuntimeException("Not a long-running process"); - } - - public HistoryEntry performImmediate() throws Exception { - if (_historyEntry == null) { - _historyEntry = createHistoryEntry(HistoryEntry.allocateID()); - } - _project.history.addEntry(_historyEntry); - _done = true; - - return _historyEntry; - } - - public void startPerforming(ProcessManager manager) { - throw new RuntimeException("Not a long-running process"); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("id"); writer.value(hashCode()); - writer.key("description"); writer.value(_historyEntry != null ? _historyEntry.description : _briefDescription); - writer.key("immediate"); writer.value(true); - writer.key("status"); writer.value(_done ? "done" : "pending"); - writer.endObject(); - } - - - @Override - public boolean isDone() { - return _done; - } - - abstract protected HistoryEntry createHistoryEntry(long historyEntryID) throws Exception; -} diff --git a/main/src/com/google/gridworks/protograph/AnonymousNode.java b/main/src/com/google/gridworks/protograph/AnonymousNode.java deleted file mode 100644 index 551b0bc82..000000000 --- a/main/src/com/google/gridworks/protograph/AnonymousNode.java +++ /dev/null @@ -1,45 +0,0 @@ -package com.google.gridworks.protograph; - -import java.util.LinkedList; -import java.util.List; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -public class AnonymousNode implements Node, NodeWithLinks { - final public FreebaseType type; - final public List links = new LinkedList(); - - public AnonymousNode(FreebaseType type) { - this.type = type; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("nodeType"); writer.value("anonymous"); - writer.key("type"); type.write(writer, options); - if (links != null) { - writer.key("links"); writer.array(); - for (Link link : links) { - link.write(writer, options); - } - writer.endArray(); - } - writer.endObject(); - } - - public void addLink(Link link) { - links.add(link); - } - - public Link getLink(int index) { - return links.get(index); - } - - public int getLinkCount() { - return links.size(); - } -} diff --git a/main/src/com/google/gridworks/protograph/BooleanColumnCondition.java b/main/src/com/google/gridworks/protograph/BooleanColumnCondition.java deleted file mode 100644 index a53bfcf36..000000000 --- a/main/src/com/google/gridworks/protograph/BooleanColumnCondition.java +++ /dev/null @@ -1,42 +0,0 @@ -package com.google.gridworks.protograph; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; - - -public class BooleanColumnCondition implements Condition { - final public String columnName; - - public BooleanColumnCondition(String columnName) { - this.columnName = columnName; - } - - @Override - public boolean test(Project project, int rowIndex, Row row) { - Column column = project.columnModel.getColumnByName(columnName); - if (column != null) { - Object o = row.getCellValue(column.getCellIndex()); - if (o != null) { - if (o instanceof Boolean) { - return ((Boolean) o).booleanValue(); - } else { - return Boolean.parseBoolean(o.toString()); - } - } - } - return false; - } - - @Override - public void write(JSONWriter writer, Properties options) throws JSONException { - writer.object(); - writer.key("columnName"); writer.value(columnName); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/protograph/CellKeyNode.java b/main/src/com/google/gridworks/protograph/CellKeyNode.java deleted file mode 100644 index e6b5ddf05..000000000 --- a/main/src/com/google/gridworks/protograph/CellKeyNode.java +++ /dev/null @@ -1,33 +0,0 @@ -package com.google.gridworks.protograph; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -public class CellKeyNode extends CellNode { - final public FreebaseTopic namespace; - - public CellKeyNode( - FreebaseTopic namespace - ) { - this.namespace = namespace; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("nodeType"); writer.value("cell-as-key"); - - writer.key("columnNames"); - writer.array(); - for (String name : columnNames) { - writer.value(name); - } - writer.endArray(); - - writer.key("namespace"); namespace.write(writer, options); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/protograph/CellNode.java b/main/src/com/google/gridworks/protograph/CellNode.java deleted file mode 100644 index 7284ada77..000000000 --- a/main/src/com/google/gridworks/protograph/CellNode.java +++ /dev/null @@ -1,8 +0,0 @@ -package com.google.gridworks.protograph; - -import java.util.LinkedList; -import java.util.List; - -abstract public class CellNode implements Node { - final public List columnNames = new LinkedList(); -} diff --git a/main/src/com/google/gridworks/protograph/CellTopicNode.java b/main/src/com/google/gridworks/protograph/CellTopicNode.java deleted file mode 100644 index e2b4cdc45..000000000 --- a/main/src/com/google/gridworks/protograph/CellTopicNode.java +++ /dev/null @@ -1,56 +0,0 @@ -package com.google.gridworks.protograph; - -import java.util.LinkedList; -import java.util.List; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -public class CellTopicNode extends CellNode implements NodeWithLinks { - final public FreebaseType type; - final public List links = new LinkedList(); - - public CellTopicNode( - FreebaseType type - ) { - this.type = type; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("nodeType"); writer.value("cell-as-topic"); - writer.key("columnNames"); - writer.array(); - for (String name : columnNames) { - writer.value(name); - } - writer.endArray(); - if (type != null) { - writer.key("type"); type.write(writer, options); - } - if (links != null) { - writer.key("links"); writer.array(); - for (Link link : links) { - link.write(writer, options); - } - writer.endArray(); - } - - writer.endObject(); - } - - public void addLink(Link link) { - links.add(link); - } - - public Link getLink(int index) { - return links.get(index); - } - - public int getLinkCount() { - return links.size(); - } -} diff --git a/main/src/com/google/gridworks/protograph/CellValueNode.java b/main/src/com/google/gridworks/protograph/CellValueNode.java deleted file mode 100644 index c57fade38..000000000 --- a/main/src/com/google/gridworks/protograph/CellValueNode.java +++ /dev/null @@ -1,36 +0,0 @@ -package com.google.gridworks.protograph; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -public class CellValueNode extends CellNode { - final public String valueType; - final public String lang; - - public CellValueNode( - String valueType, - String lang - ) { - this.valueType = valueType; - this.lang = lang; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("nodeType"); writer.value("cell-as-value"); - writer.key("columnNames"); - writer.array(); - for (String name : columnNames) { - writer.value(name); - } - writer.endArray(); - writer.key("valueType"); writer.value(valueType); - writer.key("lang"); writer.value(lang); - writer.endObject(); - } - -} diff --git a/main/src/com/google/gridworks/protograph/Condition.java b/main/src/com/google/gridworks/protograph/Condition.java deleted file mode 100644 index ab45c0490..000000000 --- a/main/src/com/google/gridworks/protograph/Condition.java +++ /dev/null @@ -1,9 +0,0 @@ -package com.google.gridworks.protograph; - -import com.google.gridworks.Jsonizable; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; - -public interface Condition extends Jsonizable { - public boolean test(Project project, int rowIndex, Row row); -} diff --git a/main/src/com/google/gridworks/protograph/FreebaseProperty.java b/main/src/com/google/gridworks/protograph/FreebaseProperty.java deleted file mode 100644 index 30e24b1f7..000000000 --- a/main/src/com/google/gridworks/protograph/FreebaseProperty.java +++ /dev/null @@ -1,9 +0,0 @@ -package com.google.gridworks.protograph; - -public class FreebaseProperty extends FreebaseTopic { - //final protected FreebaseType _expectedType; - - public FreebaseProperty(String id, String name) { - super(id, name); - } -} diff --git a/main/src/com/google/gridworks/protograph/FreebaseTopic.java b/main/src/com/google/gridworks/protograph/FreebaseTopic.java deleted file mode 100644 index 16686118f..000000000 --- a/main/src/com/google/gridworks/protograph/FreebaseTopic.java +++ /dev/null @@ -1,28 +0,0 @@ -package com.google.gridworks.protograph; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.Jsonizable; - -public class FreebaseTopic implements Jsonizable { - final public String id; - final public String name; - - public FreebaseTopic(String id, String name) { - this.id = id; - this.name = name; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("id"); writer.value(id); - writer.key("name"); writer.value(name); - writer.endObject(); - } - -} diff --git a/main/src/com/google/gridworks/protograph/FreebaseTopicNode.java b/main/src/com/google/gridworks/protograph/FreebaseTopicNode.java deleted file mode 100644 index d42370a5d..000000000 --- a/main/src/com/google/gridworks/protograph/FreebaseTopicNode.java +++ /dev/null @@ -1,46 +0,0 @@ -package com.google.gridworks.protograph; - -import java.util.LinkedList; -import java.util.List; -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -public class FreebaseTopicNode implements Node, NodeWithLinks { - final public FreebaseTopic topic; - final public List links = new LinkedList(); - - public FreebaseTopicNode(FreebaseTopic topic) { - this.topic = topic; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("nodeType"); writer.value("topic"); - writer.key("topic"); topic.write(writer, options); - if (links != null) { - writer.key("links"); writer.array(); - for (Link link : links) { - link.write(writer, options); - } - writer.endArray(); - } - - writer.endObject(); - } - - public void addLink(Link link) { - links.add(link); - } - - public Link getLink(int index) { - return links.get(index); - } - - public int getLinkCount() { - return links.size(); - } -} diff --git a/main/src/com/google/gridworks/protograph/FreebaseType.java b/main/src/com/google/gridworks/protograph/FreebaseType.java deleted file mode 100644 index e8f99e404..000000000 --- a/main/src/com/google/gridworks/protograph/FreebaseType.java +++ /dev/null @@ -1,36 +0,0 @@ -package com.google.gridworks.protograph; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.Jsonizable; - -public class FreebaseType extends FreebaseTopic implements Jsonizable { - public FreebaseType(String id, String name) { - super(id, name); - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("id"); writer.value(id); - writer.key("name"); writer.value(name); - writer.endObject(); - } - - static public FreebaseType load(JSONObject obj) throws Exception { - if (obj == null) { - return null; - } - - FreebaseType type = new FreebaseType( - obj.getString("id"), - obj.getString("name") - ); - return type; - } -} diff --git a/main/src/com/google/gridworks/protograph/Link.java b/main/src/com/google/gridworks/protograph/Link.java deleted file mode 100644 index 6ec3ea898..000000000 --- a/main/src/com/google/gridworks/protograph/Link.java +++ /dev/null @@ -1,47 +0,0 @@ -package com.google.gridworks.protograph; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.Jsonizable; - -public class Link implements Jsonizable { - final public FreebaseProperty property; - final public Node target; - final public Condition condition; - final public boolean load; - - public Link(FreebaseProperty property, Node target, Condition condition, boolean load) { - this.property = property; - this.target = target; - this.condition = condition; - this.load = load; - } - - public FreebaseProperty getProperty() { - return property; - } - - public Node getTarget() { - return target; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("property"); property.write(writer, options); - if (target != null) { - writer.key("target"); - target.write(writer, options); - } - if (condition != null) { - writer.key("condition"); - condition.write(writer, options); - } - writer.endObject(); - } - -} diff --git a/main/src/com/google/gridworks/protograph/Node.java b/main/src/com/google/gridworks/protograph/Node.java deleted file mode 100644 index 3c6d23154..000000000 --- a/main/src/com/google/gridworks/protograph/Node.java +++ /dev/null @@ -1,6 +0,0 @@ -package com.google.gridworks.protograph; - -import com.google.gridworks.Jsonizable; - -public interface Node extends Jsonizable { -} diff --git a/main/src/com/google/gridworks/protograph/NodeWithLinks.java b/main/src/com/google/gridworks/protograph/NodeWithLinks.java deleted file mode 100644 index 346794416..000000000 --- a/main/src/com/google/gridworks/protograph/NodeWithLinks.java +++ /dev/null @@ -1,9 +0,0 @@ -package com.google.gridworks.protograph; - -public interface NodeWithLinks { - public void addLink(Link link); - - public int getLinkCount(); - - public Link getLink(int index); -} diff --git a/main/src/com/google/gridworks/protograph/Protograph.java b/main/src/com/google/gridworks/protograph/Protograph.java deleted file mode 100644 index eb8de5540..000000000 --- a/main/src/com/google/gridworks/protograph/Protograph.java +++ /dev/null @@ -1,168 +0,0 @@ -package com.google.gridworks.protograph; - -import java.util.LinkedList; -import java.util.List; -import java.util.Properties; - -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.model.OverlayModel; -import com.google.gridworks.model.Project; - -public class Protograph implements OverlayModel { - final protected List _rootNodes = new LinkedList(); - - public int getRootNodeCount() { - return _rootNodes.size(); - } - - public Node getRootNode(int index) { - return _rootNodes.get(index); - } - - @Override - public void onBeforeSave() { - } - - @Override - public void onAfterSave() { - } - - - @Override - public void dispose() { - } - - static public Protograph reconstruct(JSONObject o) throws JSONException { - Protograph g = new Protograph(); - - JSONArray rootNodes = o.getJSONArray("rootNodes"); - int count = rootNodes.length(); - - for (int i = 0; i < count; i++) { - JSONObject o2 = rootNodes.getJSONObject(i); - Node node = reconstructNode(o2); - if (node != null) { - g._rootNodes.add(node); - } - } - - return g; - } - - static protected Node reconstructNode(JSONObject o) throws JSONException { - Node node = null; - - String nodeType = o.getString("nodeType"); - if (nodeType.startsWith("cell-as-")) { - if ("cell-as-topic".equals(nodeType)) { - if (o.has("type")) { - node = new CellTopicNode( - reconstructType(o.getJSONObject("type")) - ); - } - } else if ("cell-as-value".equals(nodeType)) { - node = new CellValueNode( - o.getString("valueType"), - o.getString("lang") - ); - } else if ("cell-as-key".equals(nodeType)) { - node = new CellKeyNode( - reconstructTopic(o.getJSONObject("namespace")) - ); - } - - if (o.has("columnName") && !o.isNull("columnName")) { - ((CellNode) node).columnNames.add(o.getString("columnName")); - } - if (o.has("columnNames") && !o.isNull("columnNames")) { - JSONArray columnNames = o.getJSONArray("columnNames"); - int count = columnNames.length(); - - for (int c = 0; c < count; c++) { - ((CellNode) node).columnNames.add(columnNames.getString(c)); - } - } - } else if ("topic".equals(nodeType)) { - node = new FreebaseTopicNode(reconstructTopic(o.getJSONObject("topic"))); - } else if ("value".equals(nodeType)) { - node = new ValueNode( - o.get("value"), - o.getString("valueType"), - o.getString("lang") - ); - } else if ("anonymous".equals(nodeType)) { - node = new AnonymousNode(reconstructType(o.getJSONObject("type"))); - } - - if (node != null && node instanceof NodeWithLinks && o.has("links")) { - NodeWithLinks node2 = (NodeWithLinks) node; - - JSONArray links = o.getJSONArray("links"); - int linkCount = links.length(); - - for (int j = 0; j < linkCount; j++) { - JSONObject oLink = links.getJSONObject(j); - Condition condition = null; - - if (oLink.has("condition") && !oLink.isNull("condition")) { - JSONObject oCondition = oLink.getJSONObject("condition"); - if (oCondition.has("columnName") && !oCondition.isNull("columnName")) { - condition = new BooleanColumnCondition(oCondition.getString("columnName")); - } - } - - node2.addLink(new Link( - reconstructProperty(oLink.getJSONObject("property")), - oLink.has("target") && !oLink.isNull("target") ? - reconstructNode(oLink.getJSONObject("target")) : null, - condition, - oLink.has("load") && !oLink.isNull("load") ? - oLink.getBoolean("load") : true - )); - } - } - - return node; - } - - static protected FreebaseProperty reconstructProperty(JSONObject o) throws JSONException { - return new FreebaseProperty( - o.getString("id"), - o.getString("name") - ); - } - - static protected FreebaseType reconstructType(JSONObject o) throws JSONException { - return new FreebaseType( - o.getString("id"), - o.getString("name") - ); - } - - static protected FreebaseTopic reconstructTopic(JSONObject o) throws JSONException { - return new FreebaseTopic( - o.getString("id"), - o.getString("name") - ); - } - - public void write(JSONWriter writer, Properties options) throws JSONException { - writer.object(); - writer.key("rootNodes"); writer.array(); - - for (Node node : _rootNodes) { - node.write(writer, options); - } - - writer.endArray(); - writer.endObject(); - } - - static public Protograph load(Project project, JSONObject obj) throws Exception { - return reconstruct(obj); - } -} diff --git a/main/src/com/google/gridworks/protograph/ValueNode.java b/main/src/com/google/gridworks/protograph/ValueNode.java deleted file mode 100644 index 3ca4384dc..000000000 --- a/main/src/com/google/gridworks/protograph/ValueNode.java +++ /dev/null @@ -1,29 +0,0 @@ -package com.google.gridworks.protograph; - -import java.util.Properties; - -import org.json.JSONException; -import org.json.JSONWriter; - -public class ValueNode implements Node { - final public Object value; - final public String valueType; - final public String lang; - - public ValueNode(Object value, String valueType, String lang) { - this.value = value; - this.valueType = valueType; - this.lang = lang; - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - writer.key("nodeType"); writer.value("value"); - writer.key("value"); writer.value(value); - writer.key("valueType"); writer.value(valueType); - writer.key("lang"); writer.value(lang); - writer.endObject(); - } -} diff --git a/main/src/com/google/gridworks/protograph/transpose/MqlwriteLikeTransposedNodeFactory.java b/main/src/com/google/gridworks/protograph/transpose/MqlwriteLikeTransposedNodeFactory.java deleted file mode 100644 index 3aa3c5fad..000000000 --- a/main/src/com/google/gridworks/protograph/transpose/MqlwriteLikeTransposedNodeFactory.java +++ /dev/null @@ -1,346 +0,0 @@ -package com.google.gridworks.protograph.transpose; - -import java.io.IOException; -import java.io.Writer; -import java.util.LinkedList; -import java.util.List; - -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Recon; -import com.google.gridworks.protograph.AnonymousNode; -import com.google.gridworks.protograph.CellKeyNode; -import com.google.gridworks.protograph.CellNode; -import com.google.gridworks.protograph.CellTopicNode; -import com.google.gridworks.protograph.CellValueNode; -import com.google.gridworks.protograph.FreebaseProperty; -import com.google.gridworks.protograph.FreebaseTopicNode; -import com.google.gridworks.protograph.Link; -import com.google.gridworks.protograph.ValueNode; -import com.google.gridworks.util.JSONUtilities; - -public class MqlwriteLikeTransposedNodeFactory implements TransposedNodeFactory { - protected Writer writer; - protected List rootObjects = new LinkedList(); - - private static final String TYPE = "type"; - private static final String ID = "id"; - private static final String NAME = "name"; - private static final String CREATE = "create"; - private static final String VALUE = "value"; - private static final String CONNECT = "connect"; - private static final String LANG = "lang"; - - public MqlwriteLikeTransposedNodeFactory(Writer writer) { - this.writer = writer; - } - - protected JSONArray getJSON() { - return new JSONArray(rootObjects); - } - - @Override - public void flush() throws IOException { - try { - JSONWriter jsonWriter = new JSONWriter(writer); - - jsonWriter.array(); - for (JSONObject obj : rootObjects) { - jsonWriter.value(obj); - } - jsonWriter.endArray(); - - } catch (JSONException e) { - e.printStackTrace(); - } - writer.flush(); - } - - abstract protected class JsonTransposedNode implements TransposedNode { - abstract public Object getJSON(); - } - - abstract protected class JsonObjectTransposedNode extends JsonTransposedNode { - abstract public JSONObject getJSONObject(); - - protected JSONObject obj; - - public Object getJSON() { - return getJSONObject(); - } - } - - protected class AnonymousTransposedNode extends JsonObjectTransposedNode { - JsonObjectTransposedNode parent; - FreebaseProperty property; - AnonymousNode node; - - protected AnonymousTransposedNode( - JsonObjectTransposedNode parent, - FreebaseProperty property, - AnonymousNode node - ) { - this.parent = parent; - this.property = property; - this.node = node; - } - - public JSONObject getJSONObject() { - if (obj == null) { - obj = new JSONObject(); - try { - obj.put(TYPE, this.node.type.id); - obj.put(ID, (String) null); - obj.put(CREATE, "unconditional"); - } catch (JSONException e) { - e.printStackTrace(); - } - - linkTransposedNodeJSON(obj, parent, property); - } - - return obj; - } - } - - protected class CellTopicTransposedNode extends JsonObjectTransposedNode { - protected CellTopicNode node; - protected Cell cell; - - public CellTopicTransposedNode(CellTopicNode node, Cell cell) { - this.node = node; - this.cell = cell; - } - - @Override - public JSONObject getJSONObject() { - if (obj == null) { - obj = new JSONObject(); - try { - if (cell.recon != null && - cell.recon.judgment == Recon.Judgment.Matched && - cell.recon.match != null) { - obj.put(ID, cell.recon.match.id); - } else { - obj.put(ID, (String) null); - obj.put(NAME, cell.value.toString()); - obj.put(TYPE, node.type.id); - obj.put(CREATE, "unless_exists"); - } - } catch (JSONException e) { - e.printStackTrace(); - } - } - return obj; - } - } - - protected class CellValueTransposedNode extends JsonTransposedNode { - protected JSONObject obj; - protected CellValueNode node; - protected Cell cell; - - public CellValueTransposedNode(CellValueNode node, Cell cell) { - this.node = node; - this.cell = cell; - } - - public Object getJSON() { - if (obj == null) { - obj = new JSONObject(); - try { - JSONUtilities.putField(obj, VALUE, cell.value); - - obj.put(TYPE, node.valueType); - if ("/type/text".equals(node.valueType)) { - obj.put(LANG, node.lang); - } - - obj.put(CONNECT, "insert"); - } catch (JSONException e) { - e.printStackTrace(); - } - } - return obj; - } - } - - protected class CellKeyTransposedNode extends JsonTransposedNode { - protected JSONObject obj; - protected CellKeyNode node; - protected Cell cell; - - public CellKeyTransposedNode(CellKeyNode node, Cell cell) { - this.node = node; - this.cell = cell; - } - - public Object getJSON() { - if (obj == null) { - obj = new JSONObject(); - try { - obj.put(VALUE, cell.value.toString()); - - JSONObject nsObj = new JSONObject(); - nsObj.put(ID, node.namespace.id); - - obj.put("namespace", nsObj); - obj.put(CONNECT, "insert"); - } catch (JSONException e) { - e.printStackTrace(); - } - } - return obj; - } - } - - protected class TopicTransposedNode extends JsonObjectTransposedNode { - protected FreebaseTopicNode node; - - public TopicTransposedNode(FreebaseTopicNode node) { - this.node = node; - } - - @Override - public JSONObject getJSONObject() { - if (obj == null) { - obj = new JSONObject(); - try { - obj.put(ID, node.topic.id); - } catch (JSONException e) { - e.printStackTrace(); - } - } - return obj; - } - } - - protected class ValueTransposedNode extends JsonTransposedNode { - protected JSONObject obj; - protected ValueNode node; - - public ValueTransposedNode(ValueNode node) { - this.node = node; - } - - public Object getJSON() { - if (obj == null) { - obj = new JSONObject(); - try { - obj.put(VALUE, node.value); - obj.put(TYPE, node.valueType); - if ("/type/text".equals(node.valueType)) { - obj.put(LANG, node.lang); - } - - obj.put(CONNECT, "insert"); - } catch (JSONException e) { - e.printStackTrace(); - } - } - return obj; - } - } - public TransposedNode transposeAnonymousNode( - TransposedNode parentNode, - Link link, - AnonymousNode node, int rowIndex) { - - return new AnonymousTransposedNode( - parentNode instanceof JsonObjectTransposedNode ? (JsonObjectTransposedNode) parentNode : null, - link != null ? link.property : null, - node - ); - } - - public TransposedNode transposeCellNode( - TransposedNode parentNode, - Link link, - CellNode node, - int rowIndex, - int cellIndex, - Cell cell) { - - JsonTransposedNode tnode = null; - if (node instanceof CellTopicNode) { - tnode = new CellTopicTransposedNode((CellTopicNode) node, cell); - } else if (node instanceof CellValueNode) { - tnode = new CellValueTransposedNode((CellValueNode) node, cell); - } else if (node instanceof CellKeyNode) { - tnode = new CellKeyTransposedNode((CellKeyNode) node, cell); - } - - if (tnode != null) { - processTransposedNode(tnode, parentNode, link != null ? link.property : null); - } - return tnode; - } - - public TransposedNode transposeTopicNode( - TransposedNode parentNode, - Link link, - FreebaseTopicNode node, int rowIndex) { - - JsonTransposedNode tnode = new TopicTransposedNode(node); - - processTransposedNode(tnode, parentNode, link != null ? link.property : null); - - return tnode; - } - - public TransposedNode transposeValueNode( - TransposedNode parentNode, - Link link, - ValueNode node, int rowIndex) { - - JsonTransposedNode tnode = new ValueTransposedNode(node); - - processTransposedNode(tnode, parentNode, link != null ? link.property : null); - - return tnode; - } - - protected void processTransposedNode( - JsonTransposedNode tnode, - TransposedNode parentNode, - FreebaseProperty property - ) { - - if (!(tnode instanceof AnonymousTransposedNode)) { - linkTransposedNodeJSON(tnode.getJSON(), parentNode, property); - } - } - - protected void linkTransposedNodeJSON( - Object obj, - TransposedNode parentNode, - FreebaseProperty property - ) { - - if (parentNode == null) { - if (obj instanceof JSONObject) { - rootObjects.add((JSONObject) obj); - } - } else if (parentNode instanceof JsonTransposedNode) { - JSONObject parentObj = ((JsonObjectTransposedNode) parentNode).getJSONObject(); - - try { - JSONArray a = null; - if (parentObj.has(property.id)) { - a = parentObj.getJSONArray(property.id); - } else { - a = new JSONArray(); - parentObj.put(property.id, a); - } - - a.put(a.length(), obj); - } catch (JSONException e) { - e.printStackTrace(); - } - } - } -} diff --git a/main/src/com/google/gridworks/protograph/transpose/TransposedNode.java b/main/src/com/google/gridworks/protograph/transpose/TransposedNode.java deleted file mode 100644 index f14492b7a..000000000 --- a/main/src/com/google/gridworks/protograph/transpose/TransposedNode.java +++ /dev/null @@ -1,4 +0,0 @@ -package com.google.gridworks.protograph.transpose; - -public interface TransposedNode { -} diff --git a/main/src/com/google/gridworks/protograph/transpose/TransposedNodeFactory.java b/main/src/com/google/gridworks/protograph/transpose/TransposedNodeFactory.java deleted file mode 100644 index 2f9b37d80..000000000 --- a/main/src/com/google/gridworks/protograph/transpose/TransposedNodeFactory.java +++ /dev/null @@ -1,43 +0,0 @@ -package com.google.gridworks.protograph.transpose; - -import java.io.IOException; - -import com.google.gridworks.model.Cell; -import com.google.gridworks.protograph.AnonymousNode; -import com.google.gridworks.protograph.CellNode; -import com.google.gridworks.protograph.FreebaseTopicNode; -import com.google.gridworks.protograph.Link; -import com.google.gridworks.protograph.ValueNode; - -public interface TransposedNodeFactory { - public TransposedNode transposeAnonymousNode( - TransposedNode parentNode, - Link link, - AnonymousNode node, int rowIndex - ); - - public TransposedNode transposeCellNode( - TransposedNode parentNode, - Link link, - CellNode node, - int rowIndex, - int cellIndex, - Cell cell - ); - - public TransposedNode transposeValueNode( - TransposedNode parentNode, - Link link, - ValueNode node, - int rowIndex - ); - - public TransposedNode transposeTopicNode( - TransposedNode parentNode, - Link link, - FreebaseTopicNode node, - int rowIndex - ); - - public void flush() throws IOException; -} diff --git a/main/src/com/google/gridworks/protograph/transpose/Transposer.java b/main/src/com/google/gridworks/protograph/transpose/Transposer.java deleted file mode 100644 index 12d38b058..000000000 --- a/main/src/com/google/gridworks/protograph/transpose/Transposer.java +++ /dev/null @@ -1,222 +0,0 @@ -package com.google.gridworks.protograph.transpose; - -import java.util.LinkedList; -import java.util.List; - -import com.google.gridworks.browsing.FilteredRows; -import com.google.gridworks.browsing.RowVisitor; -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; -import com.google.gridworks.model.Recon.Judgment; -import com.google.gridworks.protograph.AnonymousNode; -import com.google.gridworks.protograph.CellNode; -import com.google.gridworks.protograph.CellTopicNode; -import com.google.gridworks.protograph.FreebaseTopicNode; -import com.google.gridworks.protograph.Link; -import com.google.gridworks.protograph.Node; -import com.google.gridworks.protograph.NodeWithLinks; -import com.google.gridworks.protograph.Protograph; -import com.google.gridworks.protograph.ValueNode; - -public class Transposer { - static public void transpose( - Project project, - FilteredRows filteredRows, - Protograph protograph, - Node rootNode, - TransposedNodeFactory nodeFactory - ) { - transpose(project, filteredRows, protograph, rootNode, nodeFactory, 20); - } - - static public void transpose( - Project project, - FilteredRows filteredRows, - Protograph protograph, - Node rootNode, - TransposedNodeFactory nodeFactory, - int limit - ) { - Context rootContext = new Context(rootNode, null, null, limit); - - filteredRows.accept(project, new RowVisitor() { - Context rootContext; - Protograph protograph; - Node rootNode; - TransposedNodeFactory nodeFactory; - - @Override - public boolean visit(Project project, int rowIndex, Row row) { - if (rootContext.limit <= 0 || rootContext.count < rootContext.limit) { - descend(project, protograph, nodeFactory, rowIndex, row, rootNode, rootContext); - } - - if (rootContext.limit > 0 && rootContext.count > rootContext.limit) { - return true; - } - return false; - } - - @Override - public void start(Project project) { - // TODO Auto-generated method stub - - } - - @Override - public void end(Project project) { - // TODO Auto-generated method stub - - } - - public RowVisitor init( - Context rootContext, - Protograph protograph, - Node rootNode, - TransposedNodeFactory nodeFactory - ) { - this.rootContext = rootContext; - this.protograph = protograph; - this.rootNode = rootNode; - this.nodeFactory = nodeFactory; - - return this; - } - }.init(rootContext, protograph, rootNode, nodeFactory)); - } - - static protected void descend( - Project project, - Protograph protograph, - TransposedNodeFactory nodeFactory, - int rowIndex, - Row row, - Node node, - Context context - ) { - List tnodes = new LinkedList(); - - TransposedNode parentNode = context.parent == null ? null : context.parent.transposedNode; - Link link = context.parent == null ? null : context.link; - - if (node instanceof CellNode) { - CellNode node2 = (CellNode) node; - for (String columnName : node2.columnNames) { - Column column = project.columnModel.getColumnByName(columnName); - if (column != null) { - int cellIndex = column.getCellIndex(); - - Cell cell = row.getCell(cellIndex); - if (cell != null && ExpressionUtils.isNonBlankData(cell.value)) { - if (node2 instanceof CellTopicNode && - (cell.recon == null || cell.recon.judgment == Judgment.None)) { - return; - } - - context.count++; - if (context.limit > 0 && context.count > context.limit) { - return; - } - - tnodes.add(nodeFactory.transposeCellNode( - parentNode, - link, - node2, - rowIndex, - cellIndex, - cell - )); - } - } - } - } else { - if (node instanceof AnonymousNode) { - tnodes.add(nodeFactory.transposeAnonymousNode( - parentNode, - link, - (AnonymousNode) node, - rowIndex - )); - } else if (node instanceof FreebaseTopicNode) { - tnodes.add(nodeFactory.transposeTopicNode( - parentNode, - link, - (FreebaseTopicNode) node, - rowIndex - )); - } else if (node instanceof ValueNode) { - tnodes.add(nodeFactory.transposeValueNode( - parentNode, - link, - (ValueNode) node, - rowIndex - )); - } - } - - if (node instanceof NodeWithLinks) { - NodeWithLinks node2 = (NodeWithLinks) node; - int linkCount = node2.getLinkCount(); - - for (int i = 0; i < linkCount; i++) { - Link link2 = node2.getLink(i); - if (link2.condition == null || link2.condition.test(project, rowIndex, row)) { - for (TransposedNode tnode : tnodes) { - context.transposedNode = tnode; - context.nullifySubContextNodes(); - - descend( - project, - protograph, - nodeFactory, - rowIndex, - row, - link2.getTarget(), - context.subContexts.get(i) - ); - } - } - } - } - } - - static class Context { - TransposedNode transposedNode; - List subContexts; - Context parent; - Link link; - int count; - int limit; - - Context(Node node, Context parent, Link link, int limit) { - this.parent = parent; - this.link = link; - this.limit = limit; - - if (node instanceof NodeWithLinks) { - NodeWithLinks node2 = (NodeWithLinks) node; - - int subContextCount = node2.getLinkCount(); - - subContexts = new LinkedList(); - for (int i = 0; i < subContextCount; i++) { - Link link2 = node2.getLink(i); - subContexts.add( - new Context(link2.getTarget(), this, link2, -1)); - } - } - } - - public void nullifySubContextNodes() { - if (subContexts != null) { - for (Context context : subContexts) { - context.transposedNode = null; - context.nullifySubContextNodes(); - } - } - } - } -} diff --git a/main/src/com/google/gridworks/protograph/transpose/TripleLoaderTransposedNodeFactory.java b/main/src/com/google/gridworks/protograph/transpose/TripleLoaderTransposedNodeFactory.java deleted file mode 100644 index 86441b5ae..000000000 --- a/main/src/com/google/gridworks/protograph/transpose/TripleLoaderTransposedNodeFactory.java +++ /dev/null @@ -1,725 +0,0 @@ -package com.google.gridworks.protograph.transpose; - -import java.io.IOException; -import java.io.Writer; -import java.util.HashMap; -import java.util.HashSet; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; - -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Recon; -import com.google.gridworks.model.Recon.Judgment; -import com.google.gridworks.model.recon.ReconConfig; -import com.google.gridworks.model.recon.StandardReconConfig; -import com.google.gridworks.protograph.AnonymousNode; -import com.google.gridworks.protograph.CellKeyNode; -import com.google.gridworks.protograph.CellNode; -import com.google.gridworks.protograph.CellTopicNode; -import com.google.gridworks.protograph.CellValueNode; -import com.google.gridworks.protograph.FreebaseProperty; -import com.google.gridworks.protograph.FreebaseTopic; -import com.google.gridworks.protograph.FreebaseTopicNode; -import com.google.gridworks.protograph.Link; -import com.google.gridworks.protograph.ValueNode; - -public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory { - protected Project project; - - protected boolean start = true; - protected Writer writer; - protected WritingTransposedNode lastRootNode; - protected Map varPool = new HashMap(); - protected Map newTopicVars = new HashMap(); - protected Set serializedRecons = new HashSet(); - - protected long contextID = 0; - protected int contextRowIndex; - protected int contextRefCount = 0; - protected JSONObject contextTreeRoot; - - public TripleLoaderTransposedNodeFactory(Project project, Writer writer) { - this.project = project; - this.writer = writer; - } - - @Override - public void flush() throws IOException { - if (lastRootNode != null) { - lastRootNode.write(null, null, project, -1, -1, null); - lastRootNode = null; - - writeContextTreeNode(); - } - } - - protected void writeLine(String line) { - try { - if (start) { - start = false; - } else { - writer.write('\n'); - } - writer.write(line); - } catch (IOException e) { - // ignore - } - } - - protected void writeRecon( - StringBuffer sb, - Project project, - int rowIndex, - int cellIndex, - Cell cell - ) { - Recon recon = cell.recon; - - sb.append("\"rec"); sb.append(Long.toString(recon.id)); sb.append("\""); - contextRefCount++; - - if (!serializedRecons.contains(recon.id)) { - serializedRecons.add(recon.id); - - Column column = project.columnModel.getColumnByCellIndex(cellIndex); - - // qa:sample_group - { - StringBuffer sb2 = new StringBuffer(); - - sb2.append("{ \"s\" : \"rec"); - sb2.append(Long.toString(recon.id)); - sb2.append("\", \"p\" : \"qa:sample_group\", \"o\" : "); - sb2.append(JSONObject.quote(column.getName())); - sb2.append(", \"ignore\" : true }"); - - writeLine(sb2.toString()); - } - - // qa:recon_data - { - StringBuffer sb2 = new StringBuffer(); - - String s = cell.value instanceof String ? (String) cell.value : cell.value.toString(); - - sb2.append("{ \"s\" : \"rec"); - sb2.append(Long.toString(recon.id)); - sb2.append("\", \"p\" : \"qa:recon_data\", \"ignore\" : true, \"o\" : { "); - - sb2.append(" \"history_entry\" : "); sb2.append(Long.toString(recon.judgmentHistoryEntry)); - sb2.append(", \"text\" : "); sb2.append(JSONObject.quote(s)); - sb2.append(", \"column\" : "); sb2.append(JSONObject.quote(column.getName())); - sb2.append(", \"service\" : "); sb2.append(JSONObject.quote(recon.service)); - sb2.append(", \"action\" : "); sb2.append(JSONObject.quote(recon.judgmentAction)); - sb2.append(", \"batch\" : "); sb2.append(Integer.toString(recon.judgmentBatchSize)); - - if (recon.judgment == Judgment.Matched) { - sb2.append(", \"matchRank\" : "); sb2.append(Integer.toString(recon.matchRank)); - sb2.append(", \"id\" : "); sb2.append(JSONObject.quote(recon.match.id)); - } - - ReconConfig reconConfig = column.getReconConfig(); - if (reconConfig != null && reconConfig instanceof StandardReconConfig) { - StandardReconConfig standardReconConfig = (StandardReconConfig) reconConfig; - sb2.append(", \"type\" : "); sb2.append(JSONObject.quote(standardReconConfig.typeID)); - } - - sb2.append(" } }"); - - writeLine(sb2.toString()); - } - } - } - - protected void writeLine( - String subject, String predicate, Object object, - Project project, - int subjectRowIndex, int subjectCellIndex, Cell subjectCell, - int objectRowIndex, int objectCellIndex, Cell objectCell, - boolean ignore - ) { - if (subject != null && object != null) { - String s = object instanceof String ? - JSONObject.quote((String) object) : object.toString(); - - StringBuffer sb = new StringBuffer(); - sb.append("{ \"s\" : \""); sb.append(subject); sb.append('"'); - sb.append(", \"p\" : \""); sb.append(predicate); sb.append('"'); - sb.append(", \"o\" : "); sb.append(s); - if (subjectCell != null || objectCell != null) { - sb.append(", \"meta\" : { "); - - sb.append("\"recon\" : { "); - if (subjectCell != null) { - sb.append("\"s\" : "); - writeRecon(sb, project, subjectRowIndex, subjectCellIndex, subjectCell); - } - if (objectCell != null) { - if (subjectCell != null) { - sb.append(", "); - } - sb.append("\"o\" : "); - writeRecon(sb, project, objectRowIndex, objectCellIndex, objectCell); - } - sb.append(" }"); - - sb.append(" }"); - } - if (ignore) { - sb.append(", \"ignore\" : true"); - } - sb.append(" }"); - - writeLine(sb.toString()); - } - } - - protected void writeLine( - String subject, String predicate, Object object, String lang, - Project project, int subjectRowIndex, int subjectCellIndex, Cell subjectCell, - boolean ignore - ) { - if (subject != null && object != null) { - String s = object instanceof String ? - JSONObject.quote((String) object) : object.toString(); - - StringBuffer sb = new StringBuffer(); - sb.append("{ \"s\" : \""); sb.append(subject); sb.append('"'); - sb.append(", \"p\" : \""); sb.append(predicate); sb.append('"'); - sb.append(", \"o\" : "); sb.append(s); - sb.append(", \"lang\" : "); sb.append(lang); - - if (subjectCell != null) { - sb.append(", \"meta\" : { "); - sb.append("\"recon\" : { "); - sb.append("\"s\" : "); - writeRecon(sb, project, subjectRowIndex, subjectCellIndex, subjectCell); - sb.append(" }"); - sb.append(" }"); - } - if (ignore) { - sb.append(", \"ignore\" : true"); - } - sb.append(" }"); - - writeLine(sb.toString()); - } - } - - abstract protected class WritingTransposedNode implements TransposedNode { - JSONObject jsonContextNode; - boolean load; - - public Object write( - String subject, String predicate, Project project, - int subjectRowIndex, int subjectCellIndex, Cell subjectCell) { - - return internalWrite( - subject, predicate, project, - subjectRowIndex, subjectCellIndex, subjectCell); - } - - abstract public Object internalWrite( - String subject, String predicate, Project project, - int subjectRowIndex, int subjectCellIndex, Cell subjectCell); - } - - abstract protected class TransposedNodeWithChildren extends WritingTransposedNode { - public List links = new LinkedList(); - public List rowIndices = new LinkedList(); - public List children = new LinkedList(); - - protected void writeChildren( - String subject, Project project, - int subjectRowIndex, int subjectCellIndex, Cell subjectCell) { - - for (int i = 0; i < children.size(); i++) { - WritingTransposedNode child = children.get(i); - Link link = links.get(i); - String predicate = link.property.id; - - child.write(subject, predicate, project, - subjectRowIndex, subjectCellIndex, subjectCell); - } - } - } - - protected class AnonymousTransposedNode extends TransposedNodeWithChildren { - - //protected AnonymousTransposedNode(AnonymousNode node) { } - - public Object internalWrite(String subject, String predicate, Project project, int subjectRowIndex, int subjectCellIndex, Cell subjectCell) { - if (children.size() == 0 || subject == null) { - return null; - } - - StringBuffer sb = new StringBuffer(); - sb.append("{ \"s\" : \""); sb.append(subject); sb.append('"'); - sb.append(", \"p\" : \""); sb.append(predicate); sb.append('"'); - sb.append(", \"o\" : { "); - - StringBuffer sbRecon = new StringBuffer(); - - boolean first = true; - boolean firstRecon = true; - - if (subjectCell.recon != null) { - sbRecon.append("\"s\" : "); - writeRecon(sbRecon, project, subjectRowIndex, subjectCellIndex, subjectCell); - - firstRecon = false; - } - - for (int i = 0; i < children.size(); i++) { - WritingTransposedNode child = children.get(i); - Link link = links.get(i); - - FreebaseProperty property = link.property; - - Object c = child.internalWrite(null, null, project, subjectRowIndex, subjectCellIndex, null); - if (c != null) { - if (first) { - first = false; - } else { - sb.append(", "); - } - sb.append("\"" + property.id + "\": "); - sb.append(c instanceof String ? JSONObject.quote((String) c) : c.toString()); - } - - if (child instanceof CellTopicTransposedNode) { - CellTopicTransposedNode child2 = (CellTopicTransposedNode) child; - Recon recon = child2.cell.recon; - - if (recon != null && - (recon.judgment == Judgment.Matched || recon.judgment == Judgment.New)) { - - if (firstRecon) { - firstRecon = false; - } else { - sbRecon.append(", "); - } - - sbRecon.append("\""); sbRecon.append(property.id); sbRecon.append("\" : "); - - writeRecon(sbRecon, project, - rowIndices.get(i), child2.cellIndex, child2.cell); - } - } - } - sb.append(" }, \"meta\" : { \"recon\" : { "); - sb.append(sbRecon.toString()); - sb.append(" } } }"); - - writeLine(sb.toString()); - - return null; - } - } - - protected class CellTopicTransposedNode extends TransposedNodeWithChildren { - protected CellTopicNode node; - protected int rowIndex; - protected int cellIndex; - protected Cell cell; - - public CellTopicTransposedNode(CellTopicNode node, int rowIndex, int cellIndex, Cell cell) { - this.node = node; - this.rowIndex = rowIndex; - this.cellIndex = cellIndex; - this.cell = cell; - } - - public Object internalWrite(String subject, String predicate, Project project, int subjectRowIndex, int subjectCellIndex, Cell subjectCell) { - String id = null; - if (cell.recon != null && cell.recon.judgment != Recon.Judgment.None) { - int objectRowIndex = rowIndex; - int objectCellIndex = cellIndex; - Cell objectCell = cell; - - if (cell.recon.judgment == Recon.Judgment.Matched) { - id = cell.recon.match.id; - - } else if (cell.recon.judgment == Judgment.New) { - if (newTopicVars.containsKey(cell.recon.id)) { - id = newTopicVars.get(cell.recon.id); - } else { - Column column = project.columnModel.getColumnByCellIndex(cellIndex); - String columnName = column.getName(); - - long var = 0; - if (varPool.containsKey(columnName)) { - var = varPool.get(columnName); - } - varPool.put(columnName, var + 1); - - id = "$" + columnName.replaceAll("\\W+", "_") + "_" + var; - - String typeID = node.type.id; - - ReconConfig reconConfig = column.getReconConfig(); - if (reconConfig instanceof StandardReconConfig) { - typeID = ((StandardReconConfig) reconConfig).typeID; - } - - writeLine(id, "type", typeID, project, rowIndex, cellIndex, cell, -1, -1, (Cell) null, !load); - writeLine(id, "name", cell.value, project, -1, -1, (Cell) null, -1, -1, (Cell) null, !load); - - if (cell.recon != null) { - newTopicVars.put(cell.recon.id, id); - } - } - } else { - return null; - } - - if (subject != null) { - writeLine(subject, predicate, id, project, - subjectRowIndex, subjectCellIndex, subjectCell, - objectRowIndex, objectCellIndex, objectCell, !load); - } - - writeChildren(id, project, objectRowIndex, objectCellIndex, objectCell); - } - - return id; - } - } - - protected class CellValueTransposedNode extends WritingTransposedNode { - protected JSONObject obj; - protected CellValueNode node; - protected int rowIndex; - protected int cellIndex; - protected Cell cell; - - public CellValueTransposedNode(CellValueNode node, int rowIndex, int cellIndex, Cell cell) { - this.node = node; - this.rowIndex = rowIndex; - this.cellIndex = cellIndex; - this.cell = cell; - } - - public Object internalWrite(String subject, String predicate, Project project, int subjectRowIndex, int subjectCellIndex, Cell subjectCell) { - if (subject != null) { - if ("/type/text".equals(node.lang)) { - writeLine(subject, predicate, cell.value, node.lang, project, - subjectRowIndex, subjectCellIndex, subjectCell, !load); - } else { - writeLine(subject, predicate, cell.value, project, - subjectRowIndex, subjectCellIndex, subjectCell, - -1, -1, null, !load); - } - } - - return cell.value; - } - } - - protected class CellKeyTransposedNode extends WritingTransposedNode { - protected CellKeyNode node; - protected int rowIndex; - protected int cellIndex; - protected Cell cell; - - public CellKeyTransposedNode(CellKeyNode node, int rowIndex, int cellIndex, Cell cell) { - this.node = node; - this.rowIndex = rowIndex; - this.cellIndex = cellIndex; - this.cell = cell; - } - - public Object internalWrite(String subject, String predicate, Project project, int subjectRowIndex, int subjectCellIndex, Cell subjectCell) { - writeLine(subject, "key", node.namespace.id + "/" + cell.value, project, - subjectRowIndex, subjectCellIndex, subjectCell, - -1, -1, null, !load); - - return null; - } - } - - protected class TopicTransposedNode extends TransposedNodeWithChildren { - protected FreebaseTopicNode node; - - public TopicTransposedNode(FreebaseTopicNode node) { - this.node = node; - } - - public Object internalWrite(String subject, String predicate, Project project, int subjectRowIndex, int subjectCellIndex, Cell subjectCell) { - writeLine(subject, predicate, node.topic.id, project, - subjectRowIndex, subjectCellIndex, subjectCell, - -1, -1, null, !load); - - writeChildren(node.topic.id, project, -1, -1, null); - - return node.topic.id; - } - } - - protected class ValueTransposedNode extends WritingTransposedNode { - protected ValueNode node; - - public ValueTransposedNode(ValueNode node) { - this.node = node; - } - - public Object internalWrite(String subject, String predicate, Project project, int subjectRowIndex, int subjectCellIndex, Cell subjectCell) { - if ("/type/text".equals(node.lang)) { - writeLine(subject, predicate, node.value, node.lang, project, - subjectRowIndex, subjectCellIndex, subjectCell, !load); - } else { - writeLine(subject, predicate, node.value, project, - subjectRowIndex, subjectCellIndex, subjectCell, - -1, -1, null, !load); - } - - return node.value; - } - } - - public TransposedNode transposeAnonymousNode( - TransposedNode parentNode, - Link link, - AnonymousNode node, int rowIndex) { - - WritingTransposedNode parentNode2 = (WritingTransposedNode) parentNode; - WritingTransposedNode tnode = new AnonymousTransposedNode(); - - tnode.load = - (parentNode2 == null || parentNode2.load) && - (link == null || link.load); - - processTransposedNode(tnode, parentNode, link, rowIndex); - - tnode.jsonContextNode = addJsonContext( - parentNode2 != null ? parentNode2.jsonContextNode : null, - link != null ? link.property.id : null, - null - ); - - return tnode; - } - - public TransposedNode transposeCellNode( - TransposedNode parentNode, - Link link, - CellNode node, - int rowIndex, - int cellIndex, - Cell cell) { - - WritingTransposedNode parentNode2 = (WritingTransposedNode) parentNode; - - WritingTransposedNode tnode = null; - if (node instanceof CellTopicNode) { - if (cell.recon != null && - (cell.recon.judgment == Judgment.Matched || - cell.recon.judgment == Judgment.New)) { - - tnode = new CellTopicTransposedNode( - (CellTopicNode) node, rowIndex, cellIndex, cell); - } - } else if (node instanceof CellValueNode) { - tnode = new CellValueTransposedNode((CellValueNode) node, rowIndex, cellIndex, cell); - } else if (node instanceof CellKeyNode) { - tnode = new CellKeyTransposedNode((CellKeyNode) node, rowIndex, cellIndex, cell); - } - - if (tnode != null) { - tnode.load = - (parentNode2 == null || parentNode2.load) && - (link == null || link.load); - - processTransposedNode(tnode, parentNode, link, rowIndex); - - tnode.jsonContextNode = addJsonContext( - parentNode2 != null ? parentNode2.jsonContextNode : null, - link != null ? link.property.id : null, - cell, - rowIndex - ); - } - return tnode; - } - - public TransposedNode transposeTopicNode( - TransposedNode parentNode, - Link link, - FreebaseTopicNode node, - int rowIndex) { - - WritingTransposedNode parentNode2 = (WritingTransposedNode) parentNode; - WritingTransposedNode tnode = new TopicTransposedNode(node); - - tnode.load = - (parentNode2 == null || parentNode2.load) && - (link == null || link.load); - - processTransposedNode(tnode, parentNode, link, rowIndex); - - tnode.jsonContextNode = addJsonContext( - parentNode2 != null ? parentNode2.jsonContextNode : null, - link != null ? link.property.id : null, - node.topic - ); - - return tnode; - } - - public TransposedNode transposeValueNode( - TransposedNode parentNode, - Link link, - ValueNode node, - int rowIndex) { - - WritingTransposedNode parentNode2 = (WritingTransposedNode) parentNode; - WritingTransposedNode tnode = new ValueTransposedNode(node); - - tnode.load = - (parentNode2 == null || parentNode2.load) && - (link == null || link.load); - - processTransposedNode(tnode, parentNode, link, rowIndex); - - tnode.jsonContextNode = addJsonContext( - parentNode2 != null ? parentNode2.jsonContextNode : null, - link != null ? link.property.id : null, - node.value - ); - - return tnode; - } - - protected void processTransposedNode( - WritingTransposedNode tnode, - TransposedNode parentNode, - Link link, - int rowIndex - ) { - if (parentNode != null) { - if (parentNode instanceof TransposedNodeWithChildren) { - TransposedNodeWithChildren parentNode2 = (TransposedNodeWithChildren) parentNode; - parentNode2.rowIndices.add(rowIndex); - parentNode2.children.add(tnode); - parentNode2.links.add(link); - } - } else { - addRootNode(tnode, rowIndex); - } - } - - protected JSONObject addJsonContext(JSONObject parent, String key, Object value) { - JSONObject o = new JSONObject(); - - try { - if (value instanceof FreebaseTopic) { - FreebaseTopic topic = (FreebaseTopic) value; - o.put("id", topic.id); - o.put("name", topic.name); - } else { - o.put("v", value); - } - } catch (JSONException e) { - // ignore - } - - connectJsonContext(parent, o, key); - return o; - } - - protected JSONObject addJsonContext(JSONObject parent, String key, Cell cell, int rowIndex) { - JSONObject o = new JSONObject(); - - connectJsonContext(parent, o, key); - - try { - if (cell != null) { - o.put("v", cell.value); - if (cell.recon != null) { - o.put("recon", "rec" + cell.recon.id); - - if (cell.recon.judgment == Judgment.Matched) { - o.put("id", cell.recon.match.id); - o.put("name", cell.recon.match.name); - } - - // qa:display_context - { - StringBuffer sb2 = new StringBuffer(); - - sb2.append("{ \"ignore\" : true, \"s\" : \"rec"); - sb2.append(Long.toString(cell.recon.id)); - sb2.append("\", \"p\" : \"qa:display_context\", \"o\" : \"ctx"); - sb2.append(Long.toString(contextID)); - sb2.append("\", \"meta\" : { \"row\" : "); - sb2.append(Integer.toString(rowIndex)); - sb2.append(" } }"); - - writeLine(sb2.toString()); - } - } - } - } catch (JSONException e) { - // ignore - } - - return o; - } - - protected void connectJsonContext(JSONObject parent, JSONObject o, String key) { - try { - if (parent == null) { - contextTreeRoot = o; - } else { - JSONArray a = null; - if (parent.has(key)) { - a = parent.getJSONArray(key); - } else { - a = new JSONArray(); - parent.put(key, a); - } - - a.put(o); - } - } catch (JSONException e) { - // ignore - } - } - - protected void addRootNode(WritingTransposedNode tnode, int rowIndex) { - if (lastRootNode != null) { - lastRootNode.write(null, null, project, -1, -1, null); - writeContextTreeNode(); - } - lastRootNode = tnode; - - contextTreeRoot = null; - contextRowIndex = rowIndex; - contextRefCount = 0; - contextID++; - } - - protected void writeContextTreeNode() { - if (contextTreeRoot != null && contextRefCount > 0) { - StringBuffer sb = new StringBuffer(); - - sb.append("{ \"ignore\" : true, \"s\" : \"ctx"); - sb.append(Long.toString(contextID)); - sb.append("\", \"p\" : \"qa:context_data\", \"o\" : { \"row\" : "); - sb.append(Integer.toString(contextRowIndex)); - sb.append(", \"data\" : "); - sb.append(contextTreeRoot.toString()); - sb.append(" } }"); - - writeLine(sb.toString()); - } - } -} diff --git a/main/src/com/google/gridworks/sorting/BaseSorter.java b/main/src/com/google/gridworks/sorting/BaseSorter.java deleted file mode 100644 index ffe278216..000000000 --- a/main/src/com/google/gridworks/sorting/BaseSorter.java +++ /dev/null @@ -1,140 +0,0 @@ -package com.google.gridworks.sorting; - -import java.util.List; - -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.model.Project; -import com.google.gridworks.sorting.Criterion.KeyMaker; - -abstract public class BaseSorter { - protected Criterion[] _criteria; - protected KeyMaker[] _keyMakers; - protected ComparatorWrapper[] _comparatorWrappers; - protected List _keys; - - public class ComparatorWrapper { - final public int criterionIndex; - final protected int multiplier; - - public ComparatorWrapper(int criterionIndex) { - this.criterionIndex = criterionIndex; - this.multiplier = _criteria[criterionIndex].reverse ? -1 : 1; - } - - public Object getKey(Project project, Object o, int index) { - while (index >= _keys.size()) { - _keys.add(null); - } - - Object[] keys = _keys.get(index); - if (keys == null) { - keys = makeKeys(project, o, index); - _keys.set(index, keys); - } - return keys[criterionIndex]; - } - - public int compare(Project project, Object o1, int i1, Object o2, int i2) { - Criterion c = _criteria[criterionIndex]; - Object key1 = getKey(project, o1, i1); - Object key2 = getKey(project, o2, i2); - - if (key1 == null) { - if (key2 == null) { - return 0; - } else if (key2 instanceof EvalError) { - return c.blankPosition - c.errorPosition; - } else { - return c.blankPosition; - } - } else if (key1 instanceof EvalError) { - if (key2 == null) { - return c.errorPosition - c.blankPosition; - } else if (key2 instanceof EvalError) { - return 0; - } else { - return c.errorPosition; - } - } else { - if (key2 == null) { - return -c.blankPosition; - } else if (key2 instanceof EvalError) { - return -c.errorPosition; - } else { - return _keyMakers[criterionIndex].compareKeys(key1, key2) * multiplier; - } - } - } - } - - public void initializeFromJSON(Project project, JSONObject obj) throws JSONException { - if (obj.has("criteria") && !obj.isNull("criteria")) { - JSONArray a = obj.getJSONArray("criteria"); - int count = a.length(); - - _criteria = new Criterion[count]; - _keyMakers = new KeyMaker[count]; - _comparatorWrappers = new ComparatorWrapper[count]; - - for (int i = 0; i < count; i++) { - JSONObject obj2 = a.getJSONObject(i); - - _criteria[i] = createCriterionFromJSON(project, obj2); - _keyMakers[i] = _criteria[i].createKeyMaker(); - _comparatorWrappers[i] = new ComparatorWrapper(i); - } - } else { - _criteria = new Criterion[0]; - _keyMakers = new KeyMaker[0]; - _comparatorWrappers = new ComparatorWrapper[0]; - } - } - - public boolean hasCriteria() { - return _criteria != null && _criteria.length > 0; - } - - protected Criterion createCriterionFromJSON(Project project, JSONObject obj) throws JSONException { - String valueType = "string"; - if (obj.has("valueType") && !obj.isNull("valueType")) { - valueType = obj.getString("valueType"); - } - - Criterion c = null; - if ("boolean".equals(valueType)) { - c = new BooleanCriterion(); - } else if ("date".equals(valueType)) { - c = new DateCriterion(); - } else if ("number".equals(valueType)) { - c = new NumberCriterion(); - } else { - c = new StringCriterion(); - } - - c.initializeFromJSON(project, obj); - return c; - } - - abstract protected Object makeKey( - Project project, KeyMaker keyMaker, Criterion c, Object o, int index); - - protected Object[] makeKeys(Project project, Object o, int index) { - Object[] keys = new Object[_keyMakers.length]; - for (int i = 0; i < keys.length; i++) { - keys[i] = makeKey(project, _keyMakers[i], _criteria[i], o, index); - } - return keys; - } - - protected int compare(Project project, Object o1, int i1, Object o2, int i2) { - int c = 0; - for (int i = 0; c == 0 && i < _comparatorWrappers.length; i++) { - c = _comparatorWrappers[i].compare(project, o1, i1, o2, i2); - } - return c; - } -} diff --git a/main/src/com/google/gridworks/sorting/BooleanCriterion.java b/main/src/com/google/gridworks/sorting/BooleanCriterion.java deleted file mode 100644 index 2df57d2c3..000000000 --- a/main/src/com/google/gridworks/sorting/BooleanCriterion.java +++ /dev/null @@ -1,32 +0,0 @@ -package com.google.gridworks.sorting; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.expr.ExpressionUtils; - -public class BooleanCriterion extends Criterion { - final static protected EvalError s_error = new EvalError("Not a boolean"); - - @Override - public KeyMaker createKeyMaker() { - return new KeyMaker() { - @Override - protected Object makeKey(Object value) { - if (ExpressionUtils.isNonBlankData(value)) { - if (value instanceof Boolean) { - return value; - } else if (value instanceof String) { - return Boolean.parseBoolean((String) value); - } else { - return s_error; - } - } - return value; - } - - @Override - public int compareKeys(Object key1, Object key2) { - return ((Boolean) key1).compareTo((Boolean) key2); - } - }; - } -} diff --git a/main/src/com/google/gridworks/sorting/Criterion.java b/main/src/com/google/gridworks/sorting/Criterion.java deleted file mode 100644 index 862a3469d..000000000 --- a/main/src/com/google/gridworks/sorting/Criterion.java +++ /dev/null @@ -1,94 +0,0 @@ -package com.google.gridworks.sorting; - -import org.json.JSONException; -import org.json.JSONObject; - -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Record; -import com.google.gridworks.model.Row; - -abstract public class Criterion { - public String columnName; - protected int cellIndex; - - // These take on positive and negative values to indicate where blanks and errors - // go relative to non-blank values. They are also relative to each another. - // Blanks and errors are not affected by the reverse flag. - public int blankPosition = 1; - public int errorPosition = 2; - - public boolean reverse; - - public void initializeFromJSON(Project project, JSONObject obj) throws JSONException { - if (obj.has("column") && !obj.isNull("column")) { - columnName = obj.getString("column"); - - Column column = project.columnModel.getColumnByName(columnName); - cellIndex = column != null ? column.getCellIndex() : -1; - } - - if (obj.has("blankPosition") && !obj.isNull("blankPosition")) { - blankPosition = obj.getInt("blankPosition"); - } - if (obj.has("errorPosition") && !obj.isNull("errorPosition")) { - errorPosition = obj.getInt("errorPosition"); - } - - if (obj.has("reverse") && !obj.isNull("reverse")) { - reverse = obj.getBoolean("reverse"); - } - } - - abstract public class KeyMaker { - public Object makeKey(Project project, Record record) { - Object error = null; - Object finalKey = null; - - for (int r = record.fromRowIndex; r < record.toRowIndex; r++) { - Object key = makeKey(project, project.rows.get(r), r); - if (ExpressionUtils.isError(key)) { - error = key; - } else if (ExpressionUtils.isNonBlankData(key)) { - if (finalKey == null) { - finalKey = key; - } else { - int c = compareKeys(finalKey, key); - if (reverse) { - if (c < 0) { // key > finalKey - finalKey = key; - } - } else { - if (c > 0) { // key < finalKey - finalKey = key; - } - } - } - } - } - - if (finalKey != null) { - return finalKey; - } else if (error != null) { - return error; - } else { - return null; - } - } - - public Object makeKey(Project project, Row row, int rowIndex) { - if (cellIndex < 0) { - return null; - } else { - Object value = row.getCellValue(cellIndex); - return makeKey(value); - } - } - - abstract public int compareKeys(Object key1, Object key2); - - abstract protected Object makeKey(Object value); - } - abstract public KeyMaker createKeyMaker(); -} diff --git a/main/src/com/google/gridworks/sorting/DateCriterion.java b/main/src/com/google/gridworks/sorting/DateCriterion.java deleted file mode 100644 index 28de3953f..000000000 --- a/main/src/com/google/gridworks/sorting/DateCriterion.java +++ /dev/null @@ -1,35 +0,0 @@ -package com.google.gridworks.sorting; - -import java.util.Calendar; -import java.util.Date; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.expr.ExpressionUtils; - -public class DateCriterion extends Criterion { - final static protected EvalError s_error = new EvalError("Not a date"); - - @Override - public KeyMaker createKeyMaker() { - return new KeyMaker() { - @Override - protected Object makeKey(Object value) { - if (ExpressionUtils.isNonBlankData(value)) { - if (value instanceof Date) { - return value; - } else if (value instanceof Calendar) { - return ((Calendar) value).getTime(); - } else { - return s_error; - } - } - return value; - } - - @Override - public int compareKeys(Object key1, Object key2) { - return ((Date) key1).compareTo((Date) key2); - } - }; - } -} diff --git a/main/src/com/google/gridworks/sorting/NumberCriterion.java b/main/src/com/google/gridworks/sorting/NumberCriterion.java deleted file mode 100644 index cefe821cc..000000000 --- a/main/src/com/google/gridworks/sorting/NumberCriterion.java +++ /dev/null @@ -1,50 +0,0 @@ -package com.google.gridworks.sorting; - -import java.util.Calendar; -import java.util.Date; - -import com.google.gridworks.expr.EvalError; -import com.google.gridworks.expr.ExpressionUtils; - -public class NumberCriterion extends Criterion { - - final static protected EvalError s_error = new EvalError("Not a number"); - - @Override - public KeyMaker createKeyMaker() { - return new KeyMaker() { - @Override - protected Object makeKey(Object value) { - if (ExpressionUtils.isNonBlankData(value)) { - if (value instanceof Number) { - return value; - } else if (value instanceof Boolean) { - return ((Boolean) value).booleanValue() ? 1 : 0; - } else if (value instanceof Date) { - return ((Date) value).getTime(); - } else if (value instanceof Calendar) { - return ((Calendar) value).getTime().getTime(); - } else if (value instanceof String) { - try { - double d = Double.parseDouble((String) value); - if (!Double.isNaN(d)) { - return d; - } - } catch (NumberFormatException e) { - // fall through - } - } - return s_error; - } - return value; - } - - @Override - public int compareKeys(Object key1, Object key2) { - double d1 = ((Number) key1).doubleValue(); - double d2 = ((Number) key2).doubleValue(); - return d1 < d2 ? -1 : (d1 > d2 ? 1 : 0); - } - }; - } -} diff --git a/main/src/com/google/gridworks/sorting/SortingRecordVisitor.java b/main/src/com/google/gridworks/sorting/SortingRecordVisitor.java deleted file mode 100644 index dda891f38..000000000 --- a/main/src/com/google/gridworks/sorting/SortingRecordVisitor.java +++ /dev/null @@ -1,65 +0,0 @@ -package com.google.gridworks.sorting; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.List; - -import com.google.gridworks.browsing.RecordVisitor; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Record; -import com.google.gridworks.sorting.Criterion.KeyMaker; - -public class SortingRecordVisitor extends BaseSorter implements RecordVisitor { - final protected RecordVisitor _visitor; - protected List _records; - - public SortingRecordVisitor(RecordVisitor visitor) { - _visitor = visitor; - } - - @Override - public void start(Project project) { - int count = project.recordModel.getRecordCount(); - _records = new ArrayList(count); - _keys = new ArrayList(count); - } - - @Override - public void end(Project project) { - _visitor.start(project); - - Collections.sort(_records, new Comparator() { - Project project; - - Comparator init(Project project) { - this.project = project; - return this; - } - - @Override - public int compare(Record o1, Record o2) { - return SortingRecordVisitor.this.compare(project, o1, o1.recordIndex, o2, o2.recordIndex); - } - }.init(project)); - - for (Record record : _records) { - _visitor.visit(project, record); - } - - _visitor.end(project); - } - - @Override - public boolean visit(Project project, Record record) { - _records.add(record); - return false; - } - - @Override - protected Object makeKey( - Project project, KeyMaker keyMaker, Criterion c, Object o, int index) { - - return keyMaker.makeKey(project, (Record) o); - } -} diff --git a/main/src/com/google/gridworks/sorting/SortingRowVisitor.java b/main/src/com/google/gridworks/sorting/SortingRowVisitor.java deleted file mode 100644 index 0bbfe79c6..000000000 --- a/main/src/com/google/gridworks/sorting/SortingRowVisitor.java +++ /dev/null @@ -1,75 +0,0 @@ -package com.google.gridworks.sorting; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.List; - -import com.google.gridworks.browsing.RowVisitor; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; -import com.google.gridworks.sorting.Criterion.KeyMaker; - -public class SortingRowVisitor extends BaseSorter implements RowVisitor { - final protected RowVisitor _visitor; - protected List _indexedRows; - - static protected class IndexedRow { - final int index; - final Row row; - - IndexedRow(int index, Row row) { - this.index = index; - this.row = row; - } - } - - public SortingRowVisitor(RowVisitor visitor) { - _visitor = visitor; - } - - @Override - public void start(Project project) { - int count = project.rows.size(); - _indexedRows = new ArrayList(count); - _keys = new ArrayList(count); - } - - @Override - public void end(Project project) { - _visitor.start(project); - - Collections.sort(_indexedRows, new Comparator() { - Project project; - - Comparator init(Project project) { - this.project = project; - return this; - } - - @Override - public int compare(IndexedRow o1, IndexedRow o2) { - return SortingRowVisitor.this.compare(project, o1.row, o1.index, o2.row, o2.index); - } - }.init(project)); - - for (IndexedRow indexedRow : _indexedRows) { - _visitor.visit(project, indexedRow.index, indexedRow.row); - } - - _visitor.end(project); - } - - @Override - public boolean visit(Project project, int rowIndex, Row row) { - _indexedRows.add(new IndexedRow(rowIndex, row)); - return false; - } - - @Override - protected Object makeKey( - Project project, KeyMaker keyMaker, Criterion c, Object o, int index) { - - return keyMaker.makeKey(project, (Row) o, index); - } -} diff --git a/main/src/com/google/gridworks/sorting/StringCriterion.java b/main/src/com/google/gridworks/sorting/StringCriterion.java deleted file mode 100644 index 719523e36..000000000 --- a/main/src/com/google/gridworks/sorting/StringCriterion.java +++ /dev/null @@ -1,40 +0,0 @@ -package com.google.gridworks.sorting; - -import org.json.JSONException; -import org.json.JSONObject; - -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.model.Project; - -public class StringCriterion extends Criterion { - public boolean caseSensitive; - - @Override - public void initializeFromJSON(Project project, JSONObject obj) throws JSONException { - super.initializeFromJSON(project, obj); - - if (obj.has("caseSensitive") && !obj.isNull("caseSensitive")) { - caseSensitive = obj.getBoolean("caseSensitive"); - } - } - - @Override - public KeyMaker createKeyMaker() { - return new KeyMaker() { - @Override - protected Object makeKey(Object value) { - return (ExpressionUtils.isNonBlankData(value) && !(value instanceof String)) ? - value.toString() : value; - } - - @Override - public int compareKeys(Object key1, Object key2) { - if (StringCriterion.this.caseSensitive) { - return ((String) key1).compareTo((String) key2); - } else { - return ((String) key1).compareToIgnoreCase((String) key2); - } - } - }; - } -} diff --git a/main/src/com/google/gridworks/templating/DynamicFragment.java b/main/src/com/google/gridworks/templating/DynamicFragment.java deleted file mode 100644 index f274bf302..000000000 --- a/main/src/com/google/gridworks/templating/DynamicFragment.java +++ /dev/null @@ -1,11 +0,0 @@ -package com.google.gridworks.templating; - -import com.google.gridworks.expr.Evaluable; - -class DynamicFragment extends Fragment { - final public Evaluable eval; - - public DynamicFragment(Evaluable eval) { - this.eval = eval; - } -} diff --git a/main/src/com/google/gridworks/templating/Fragment.java b/main/src/com/google/gridworks/templating/Fragment.java deleted file mode 100644 index 07f483433..000000000 --- a/main/src/com/google/gridworks/templating/Fragment.java +++ /dev/null @@ -1,5 +0,0 @@ -package com.google.gridworks.templating; - -public class Fragment { - -} diff --git a/main/src/com/google/gridworks/templating/Parser.java b/main/src/com/google/gridworks/templating/Parser.java deleted file mode 100644 index 35183bdc0..000000000 --- a/main/src/com/google/gridworks/templating/Parser.java +++ /dev/null @@ -1,71 +0,0 @@ -package com.google.gridworks.templating; - -import java.util.ArrayList; -import java.util.List; - -import com.google.gridworks.expr.MetaParser; -import com.google.gridworks.expr.ParsingException; -import com.google.gridworks.gel.ast.FieldAccessorExpr; -import com.google.gridworks.gel.ast.VariableExpr; - -public class Parser { - static public Template parse(String s) throws ParsingException { - List fragments = new ArrayList(); - - int start = 0, current = 0; - while (current < s.length() - 1) { - char c = s.charAt(current); - if (c == '\\') { - current += 2; - continue; - } - - char c2 = s.charAt(current + 1); - if (c == '$' && c2 == '{') { - int closeBrace = s.indexOf('}', current + 2); - if (closeBrace > current + 1) { - String columnName = s.substring(current + 2, closeBrace); - - if (current > start) { - fragments.add(new StaticFragment(s.substring(start, current))); - } - start = current = closeBrace + 1; - - fragments.add( - new DynamicFragment( - new FieldAccessorExpr( - new FieldAccessorExpr( - new VariableExpr("cells"), - columnName), - "value"))); - - continue; - } - } else if (c == '{' && c2 == '{') { - int closeBrace = s.indexOf('}', current + 2); - if (closeBrace > current + 1 && closeBrace < s.length() - 1 && s.charAt(closeBrace + 1) == '}') { - String expression = s.substring(current + 2, closeBrace); - - if (current > start) { - fragments.add(new StaticFragment(s.substring(start, current))); - } - start = current = closeBrace + 2; - - fragments.add( - new DynamicFragment( - MetaParser.parse(expression))); - - continue; - } - } - - current++; - } - - if (start < s.length()) { - fragments.add(new StaticFragment(s.substring(start))); - } - - return new Template(fragments); - } -} diff --git a/main/src/com/google/gridworks/templating/StaticFragment.java b/main/src/com/google/gridworks/templating/StaticFragment.java deleted file mode 100644 index 3db574f08..000000000 --- a/main/src/com/google/gridworks/templating/StaticFragment.java +++ /dev/null @@ -1,9 +0,0 @@ -package com.google.gridworks.templating; - -class StaticFragment extends Fragment { - final public String text; - - public StaticFragment(String text) { - this.text = text; - } -} diff --git a/main/src/com/google/gridworks/templating/Template.java b/main/src/com/google/gridworks/templating/Template.java deleted file mode 100644 index 9cb48a87a..000000000 --- a/main/src/com/google/gridworks/templating/Template.java +++ /dev/null @@ -1,173 +0,0 @@ -package com.google.gridworks.templating; - -import java.io.IOException; -import java.io.Writer; -import java.util.Collection; -import java.util.List; -import java.util.Properties; - -import com.google.gridworks.browsing.RecordVisitor; -import com.google.gridworks.browsing.RowVisitor; -import com.google.gridworks.expr.ExpressionUtils; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Record; -import com.google.gridworks.model.Row; - -public class Template { - protected String _prefix; - protected String _suffix; - protected String _separator; - - protected List _fragments; - - public Template(List fragments) { - _fragments = fragments; - } - - public void setPrefix(String prefix) { - _prefix = prefix; - } - - public void setSuffix(String suffix) { - _suffix = suffix; - } - - public void setSeparator(String separator) { - _separator = separator; - } - - public RowVisitor getRowVisitor(Writer writer, int limit) { - return get(writer, limit); - } - - public RecordVisitor getRecordVisitor(Writer writer, int limit) { - return get(writer, limit); - } - - protected RowWritingVisitor get(Writer writer, int limit) { - return new RowWritingVisitor(writer, limit); - } - - protected class RowWritingVisitor implements RowVisitor, RecordVisitor { - final protected int limit; - final protected Writer writer; - protected Properties bindings; - - public int total; - - public RowWritingVisitor(Writer writer, int limit) { - this.limit = limit; - this.writer = writer; - } - - @Override - public void start(Project project) { - bindings = ExpressionUtils.createBindings(project); - - try { - if (_prefix != null) { - writer.write(_prefix); - } - } catch (IOException e) { - // ignore - } - } - - @Override - public void end(Project project) { - try { - if (_suffix != null) { - writer.write(_suffix); - } - } catch (IOException e) { - // ignore - } - } - - public boolean visit(Project project, int rowIndex, Row row) { - if (limit <= 0 || total < limit) { - internalVisit(project, rowIndex, row); - } - total++; - - return limit > 0 && total >= limit; - } - - @Override - public boolean visit(Project project, Record record) { - if (limit <= 0 || total < limit) { - internalVisit(project, record); - } - total++; - - return limit > 0 && total >= limit; - } - - protected void writeValue(Object v) throws IOException { - if (v == null) { - writer.write("null"); - } else if (ExpressionUtils.isError(v)) { - writer.write("null"); - //writer.write("[Error: " + ((EvalError) v).message); - } else if (v instanceof String) { - writer.write((String) v); - } else { - writer.write(v.toString()); - } - } - - public boolean internalVisit(Project project, int rowIndex, Row row) { - try { - if (total > 0 && _separator != null) { - writer.write(_separator); - } - - ExpressionUtils.bind(bindings, row, rowIndex, null, null); - for (Fragment f : _fragments) { - if (f instanceof StaticFragment) { - writer.write(((StaticFragment) f).text); - } else { - DynamicFragment df = (DynamicFragment) f; - Object value = df.eval.evaluate(bindings); - - if (value != null && ExpressionUtils.isArrayOrCollection(value)) { - if (ExpressionUtils.isArray(value)) { - Object[] a = (Object[]) value; - for (Object v : a) { - writeValue(v); - } - } else { - Collection a = ExpressionUtils.toObjectCollection(value); - for (Object v : a) { - writeValue(v); - } - } - continue; - } - - writeValue(value); - } - } - } catch (IOException e) { - // ignore - } - return false; - } - - protected boolean internalVisit(Project project, Record record) { - bindings.put("recordIndex", record.recordIndex); - - for (int r = record.fromRowIndex; r < record.toRowIndex; r++) { - Row row = project.rows.get(r); - - bindings.put("rowIndex", r); - - internalVisit(project, r, row); - - bindings.remove("recordIndex"); - } - return false; - } - } - -} diff --git a/main/src/com/google/gridworks/util/CookiesUtilities.java b/main/src/com/google/gridworks/util/CookiesUtilities.java deleted file mode 100644 index 501e277c2..000000000 --- a/main/src/com/google/gridworks/util/CookiesUtilities.java +++ /dev/null @@ -1,48 +0,0 @@ -package com.google.gridworks.util; - -import javax.servlet.http.Cookie; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -public class CookiesUtilities { - - public static final String DOMAIN = "127.0.0.1"; - public static final String PATH = "/"; - - public static Cookie getCookie(HttpServletRequest request, String name) { - if (name == null) throw new RuntimeException("cookie name cannot be null"); - Cookie cookie = null; - Cookie[] cookies = request.getCookies(); - if (cookies != null) { - for (Cookie c : cookies) { - if (name.equals(c.getName())) { - cookie = c; - } - } - } - return cookie; - } - - public static void setCookie(HttpServletRequest request, HttpServletResponse response, String name, String value, int max_age) { - Cookie c = new Cookie(name, value); - c.setDomain(getDomain(request)); - c.setPath(PATH); - c.setMaxAge(max_age); - response.addCookie(c); - } - - public static void deleteCookie(HttpServletRequest request, HttpServletResponse response, String name) { - Cookie c = new Cookie(name, ""); - c.setDomain(getDomain(request)); - c.setPath(PATH); - c.setMaxAge(0); - response.addCookie(c); - } - - public static String getDomain(HttpServletRequest request) { - String host = request.getHeader("Host"); - if (host == null) return DOMAIN; - int index = host.indexOf(':'); - return (index > -1) ? host.substring(0,index) : host ; - } -} diff --git a/main/src/com/google/gridworks/util/FreebaseDataExtensionJob.java b/main/src/com/google/gridworks/util/FreebaseDataExtensionJob.java deleted file mode 100644 index a49b190b1..000000000 --- a/main/src/com/google/gridworks/util/FreebaseDataExtensionJob.java +++ /dev/null @@ -1,415 +0,0 @@ -/** - * - */ -package com.google.gridworks.util; - -import java.io.DataOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.Serializable; -import java.io.StringWriter; -import java.io.Writer; -import java.net.URL; -import java.net.URLConnection; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -import com.google.gridworks.model.ReconCandidate; -import com.google.gridworks.protograph.FreebaseType; - -public class FreebaseDataExtensionJob { - static public class DataExtension { - final public Object[][] data; - - public DataExtension(Object[][] data) { - this.data = data; - } - } - - static public class ColumnInfo { - final public List names; - final public List path; - final public FreebaseType expectedType; - - protected ColumnInfo(List names, List path, FreebaseType expectedType) { - this.names = names; - this.path = path; - this.expectedType = expectedType; - } - } - - final public JSONObject extension; - final public int columnCount; - final public List columns = new ArrayList(); - - public FreebaseDataExtensionJob(JSONObject obj) throws JSONException { - this.extension = obj; - this.columnCount = (obj.has("properties") && !obj.isNull("properties")) ? - countColumns(obj.getJSONArray("properties"), columns, new ArrayList(), new ArrayList()) : 0; - } - - public Map extend( - Set ids, - Map reconCandidateMap - ) throws Exception { - StringWriter writer = new StringWriter(); - formulateQuery(ids, extension, writer); - - String query = writer.toString(); - InputStream is = doMqlRead(query); - try { - String s = ParsingUtilities.inputStreamToString(is); - JSONObject o = ParsingUtilities.evaluateJsonStringToObject(s); - - Map map = new HashMap(); - if (o.has("result")) { - JSONArray a = o.getJSONArray("result"); - int l = a.length(); - - for (int i = 0; i < l; i++) { - JSONObject o2 = a.getJSONObject(i); - String id = o2.getString("id"); - FreebaseDataExtensionJob.DataExtension ext = collectResult(o2, reconCandidateMap); - - if (ext != null) { - map.put(id, ext); - } - } - } - - return map; - } finally { - is.close(); - } - } - - protected FreebaseDataExtensionJob.DataExtension collectResult( - JSONObject obj, - Map reconCandidateMap - ) throws JSONException { - List rows = new ArrayList(); - - collectResult(rows, extension.getJSONArray("properties"), obj, 0, 0, reconCandidateMap); - - Object[][] data = new Object[rows.size()][columnCount]; - rows.toArray(data); - - return new DataExtension(data); - } - - protected void storeCell( - List rows, - int row, - int col, - Object value, - Map reconCandidateMap - ) { - while (row >= rows.size()) { - rows.add(new Object[columnCount]); - } - rows.get(row)[col] = value; - } - - protected void storeCell( - List rows, - int row, - int col, - JSONObject obj, - Map reconCandidateMap - ) throws JSONException { - String id = obj.getString("id"); - ReconCandidate rc; - if (reconCandidateMap.containsKey(id)) { - rc = reconCandidateMap.get(id); - } else { - rc = new ReconCandidate( - obj.getString("id"), - obj.getString("name"), - JSONUtilities.getStringArray(obj, "type"), - 100 - ); - - reconCandidateMap.put(id, rc); - } - - storeCell(rows, row, col, rc, reconCandidateMap); - } - - protected int[] collectResult( - List rows, - JSONObject extNode, - JSONObject resultNode, - int startRowIndex, - int startColumnIndex, - Map reconCandidateMap - ) throws JSONException { - String propertyID = extNode.getString("id"); - String expectedTypeID = extNode.getJSONObject("expected").getString("id"); - - JSONArray a = resultNode != null && resultNode.has(propertyID) && !resultNode.isNull(propertyID) ? - resultNode.getJSONArray(propertyID) : null; - - if (expectedTypeID.startsWith("/type/")) { - if (a != null) { - int l = a.length(); - for (int r = 0; r < l; r++) { - Object o = a.isNull(r) ? null : a.get(r); - if (o instanceof Serializable) { - storeCell(rows, startRowIndex++, startColumnIndex, o, reconCandidateMap); - } - } - } - - // note that we still take up a column even if we don't have any data - return new int[] { startRowIndex, startColumnIndex + 1 }; - } else { - boolean hasSubProperties = (extNode.has("properties") && !extNode.isNull("properties")); - boolean isOwnColumn = !hasSubProperties || (extNode.has("included") && extNode.getBoolean("included")); - - if (a != null && a.length() > 0) { - int maxColIndex = startColumnIndex; - - int l = a.length(); - for (int r = 0; r < l; r++) { - Object v = a.isNull(r) ? null : a.get(r); - JSONObject o = v != null && v instanceof JSONObject ? (JSONObject) v : null; - - int startColumnIndex2 = startColumnIndex; - int startRowIndex2 = startRowIndex; - - if (isOwnColumn) { - if (o != null) { - storeCell(rows, startRowIndex2++, startColumnIndex2++, o, reconCandidateMap); - } else { - storeCell(rows, startRowIndex2++, startColumnIndex2++, v, reconCandidateMap); - } - } - - if (hasSubProperties && o != null) { - int[] rowcol = collectResult( - rows, - extNode.getJSONArray("properties"), - o, - startRowIndex, - startColumnIndex2, - reconCandidateMap - ); - - startRowIndex2 = rowcol[0]; - startColumnIndex2 = rowcol[1]; - } - - startRowIndex = startRowIndex2; - maxColIndex = Math.max(maxColIndex, startColumnIndex2); - } - - return new int[] { startRowIndex, maxColIndex }; - } else { - return new int[] { - startRowIndex, - startColumnIndex + countColumns(extNode, null, new ArrayList(), new ArrayList()) - }; - } - } - } - - protected int[] collectResult( - List rows, - JSONArray subProperties, - JSONObject resultNode, - int startRowIndex, - int startColumnIndex, - Map reconCandidateMap - ) throws JSONException { - int maxStartRowIndex = startRowIndex; - - int k = subProperties.length(); - for (int c = 0; c < k; c++) { - int[] rowcol = collectResult( - rows, - subProperties.getJSONObject(c), - resultNode, - startRowIndex, - startColumnIndex, - reconCandidateMap - ); - - maxStartRowIndex = Math.max(maxStartRowIndex, rowcol[0]); - startColumnIndex = rowcol[1]; - } - - return new int[] { maxStartRowIndex, startColumnIndex }; - } - - - static protected InputStream doMqlRead(String query) throws IOException { - URL url = new URL("http://api.freebase.com/api/service/mqlread"); - - URLConnection connection = url.openConnection(); - connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded"); - connection.setConnectTimeout(5000); - connection.setDoOutput(true); - - DataOutputStream dos = new DataOutputStream(connection.getOutputStream()); - try { - String body = "extended=1&query=" + ParsingUtilities.encode(query); - - dos.writeBytes(body); - } finally { - dos.flush(); - dos.close(); - } - - connection.connect(); - - return connection.getInputStream(); - } - - static protected void formulateQuery(Set ids, JSONObject node, Writer writer) throws JSONException { - JSONWriter jsonWriter = new JSONWriter(writer); - - jsonWriter.object(); - jsonWriter.key("query"); - jsonWriter.array(); - jsonWriter.object(); - - jsonWriter.key("id"); jsonWriter.value(null); - jsonWriter.key("id|="); - jsonWriter.array(); - for (String id : ids) { - if (id != null) { - jsonWriter.value(id); - } - } - jsonWriter.endArray(); - - formulateQueryNode(node.getJSONArray("properties"), jsonWriter); - - jsonWriter.endObject(); - jsonWriter.endArray(); - jsonWriter.endObject(); - } - - static protected void formulateQueryNode(JSONObject node, JSONWriter writer) throws JSONException { - String propertyID = node.getString("id"); - String expectedTypeID = node.getJSONObject("expected").getString("id"); - - writer.key(propertyID); - writer.array(); - { - if (!expectedTypeID.startsWith("/type/")) { // not literal - writer.object(); - writer.key("optional"); writer.value(true); - - boolean hasLimit = false; - if (node.has("constraints") && !node.isNull("constraints")) { - JSONObject constraints = node.getJSONObject("constraints"); - - String[] names = JSONObject.getNames(constraints); - for (String name : names) { - Object value = constraints.get(name); - if (name.equals("limit")) { - hasLimit = true; - } - - if (!name.contains(":") && - !name.equals("limit") && - !name.equals("optional") && - !name.equals("count") && - !name.equals("estimate-count") && - !name.equals("sort") && - !name.equals("return")) { - - if (name.startsWith("!")) { - name = "!c:" + name.substring(1); - } else { - name = "c:" + name; - } - } - writer.key(name); - writer.value(value); - } - } - if (!hasLimit) { - writer.key("limit"); writer.value(10); - } - - { - boolean hasSubProperties = (node.has("properties") && !node.isNull("properties")); - - if (!hasSubProperties || (node.has("included") && node.getBoolean("included"))) { - writer.key("name"); writer.value(null); - writer.key("id"); writer.value(null); - writer.key("type"); writer.array(); writer.endArray(); - } - - if (hasSubProperties) { - formulateQueryNode(node.getJSONArray("properties"), writer); - } - } - writer.endObject(); - } - } - writer.endArray(); - } - - static protected void formulateQueryNode(JSONArray propertiesA, JSONWriter writer) throws JSONException { - int l = propertiesA.length(); - - for (int i = 0; i < l; i++) { - formulateQueryNode(propertiesA.getJSONObject(i), writer); - } - } - - static protected int countColumns(JSONObject obj, List columns, List names, List path) throws JSONException { - String name = obj.getString("name"); - - List names2 = null; - List path2 = null; - if (columns != null) { - names2 = new ArrayList(names); - names2.add(name); - - path2 = new ArrayList(path); - path2.add(obj.getString("id")); - } - - if (obj.has("properties") && !obj.isNull("properties")) { - boolean included = (obj.has("included") && obj.getBoolean("included")); - if (included && columns != null) { - JSONObject expected = obj.getJSONObject("expected"); - - columns.add(new ColumnInfo(names2, path2, - new FreebaseType(expected.getString("id"), expected.getString("name")))); - } - - return (included ? 1 : 0) + - countColumns(obj.getJSONArray("properties"), columns, names2, path2); - } else { - if (columns != null) { - JSONObject expected = obj.getJSONObject("expected"); - - columns.add(new ColumnInfo(names2, path2, - new FreebaseType(expected.getString("id"), expected.getString("name")))); - } - return 1; - } - } - - static protected int countColumns(JSONArray a, List columns, List names, List path) throws JSONException { - int c = 0; - int l = a.length(); - for (int i = 0; i < l; i++) { - c += countColumns(a.getJSONObject(i), columns, names, path); - } - return c; - } -} \ No newline at end of file diff --git a/main/src/com/google/gridworks/util/FreebaseUtils.java b/main/src/com/google/gridworks/util/FreebaseUtils.java deleted file mode 100644 index 9564e5622..000000000 --- a/main/src/com/google/gridworks/util/FreebaseUtils.java +++ /dev/null @@ -1,251 +0,0 @@ -package com.google.gridworks.util; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -import javax.servlet.http.HttpServletRequest; - -import oauth.signpost.OAuthConsumer; -import oauth.signpost.exception.OAuthCommunicationException; -import oauth.signpost.exception.OAuthExpectationFailedException; -import oauth.signpost.exception.OAuthMessageSignerException; - -import org.apache.http.Header; -import org.apache.http.HttpResponse; -import org.apache.http.NameValuePair; -import org.apache.http.client.ClientProtocolException; -import org.apache.http.client.HttpClient; -import org.apache.http.client.entity.UrlEncodedFormEntity; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.client.methods.HttpPost; -import org.apache.http.impl.client.DefaultHttpClient; -import org.apache.http.message.BasicNameValuePair; -import org.apache.http.params.CoreProtocolPNames; -import org.apache.http.util.EntityUtils; -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; - -import com.google.gridworks.GridworksServlet; -import com.google.gridworks.ProjectManager; -import com.google.gridworks.oauth.Credentials; -import com.google.gridworks.oauth.OAuthUtilities; -import com.google.gridworks.oauth.Provider; - -public class FreebaseUtils { - - static final public String FREEBASE_HOST = "www.freebase.com"; - static final public String FREEBASE_SANDBOX_HOST = "www.sandbox-freebase.com"; - - static final private String FREEQ_URL = "http://data.labs.freebase.com/freeq/gridworks"; - - static final private String GRIDWORKS_ID = "/en/gridworks"; - - private static String getUserInfoURL(String host) { - return "http://" + host + "/api/service/user_info"; - } - - private static String getMQLWriteURL(String host) { - return "http://" + host + "/api/service/mqlwrite"; - } - - private static String getMQLReadURL(String host) { - return "http://" + host + "/api/service/mqlread"; - } - - public static String getUserInfo(Credentials credentials, Provider provider) - throws OAuthMessageSignerException, OAuthExpectationFailedException, OAuthCommunicationException, ClientProtocolException, IOException { - - OAuthConsumer consumer = OAuthUtilities.getConsumer(credentials, provider); - - HttpGet httpRequest = new HttpGet(getUserInfoURL(provider.getHost())); - httpRequest.getParams().setParameter(CoreProtocolPNames.USER_AGENT, "Gridworks " + GridworksServlet.getVersion()); - - // this is required by the Metaweb API to avoid XSS - httpRequest.setHeader("X-Requested-With", "1"); - - // sign the request with the oauth library - consumer.sign(httpRequest); - - // execute the request - HttpClient httpClient = new DefaultHttpClient(); - HttpResponse httpResponse = httpClient.execute(httpRequest); - - // return the results - return EntityUtils.toString(httpResponse.getEntity()); - } - - public static String getUserBadges(Provider provider, String user_id) - throws ClientProtocolException, IOException, JSONException { - - String query = "{" + - "'id' : '" + user_id + "'," + - "'!/type/usergroup/member' : [{" + - "'id' : null," + - "'key' : [{" + - "'namespace' : null" + - "}]" + - "}]" + - "}".replace("'", "\""); - - return mqlread(provider, query); - } - - public static String mqlread(Provider provider, String query) - throws ClientProtocolException, IOException, JSONException { - - JSONObject envelope = new JSONObject(); - envelope.put("query", new JSONObject(query)); - - List formparams = new ArrayList(); - formparams.add(new BasicNameValuePair("query", envelope.toString())); - UrlEncodedFormEntity entity = new UrlEncodedFormEntity(formparams, "UTF-8"); - - HttpPost httpRequest = new HttpPost(getMQLReadURL(provider.getHost())); - httpRequest.getParams().setParameter(CoreProtocolPNames.USER_AGENT, "Gridworks " + GridworksServlet.getVersion()); - httpRequest.setEntity(entity); - - // this is required by the Metaweb API to avoid XSS - httpRequest.setHeader("X-Requested-With", "1"); - - // execute the request - HttpClient httpClient = new DefaultHttpClient(); - HttpResponse httpResponse = httpClient.execute(httpRequest); - - // return the results - return EntityUtils.toString(httpResponse.getEntity()); - } - - public static String mqlwrite(Credentials credentials, Provider provider, String query) - throws OAuthMessageSignerException, OAuthExpectationFailedException, OAuthCommunicationException, ClientProtocolException, IOException, JSONException { - OAuthConsumer consumer = OAuthUtilities.getConsumer(credentials, provider); - - JSONObject envelope = new JSONObject(); - envelope.put("query", new JSONObject(query)); - - List formparams = new ArrayList(); - formparams.add(new BasicNameValuePair("query", envelope.toString())); - UrlEncodedFormEntity entity = new UrlEncodedFormEntity(formparams, "UTF-8"); - - HttpPost httpRequest = new HttpPost(getMQLWriteURL(provider.getHost())); - httpRequest.getParams().setParameter(CoreProtocolPNames.USER_AGENT, "Gridworks " + GridworksServlet.getVersion()); - httpRequest.setEntity(entity); - - // this is required by the Metaweb API to avoid XSS - httpRequest.setHeader("X-Requested-With", "1"); - - // sign the request with the oauth library - consumer.sign(httpRequest); - - // execute the request - HttpClient httpClient = new DefaultHttpClient(); - HttpResponse httpResponse = httpClient.execute(httpRequest); - - // return the results - return EntityUtils.toString(httpResponse.getEntity()); - } - - public static String uploadTriples( - HttpServletRequest request, - String graph, - String source_name, - String source_id, - String mdo_id, - String triples - ) throws OAuthMessageSignerException, OAuthExpectationFailedException, OAuthCommunicationException, ClientProtocolException, JSONException, IOException { - - Provider provider = OAuthUtilities.getProvider(FREEBASE_HOST); - - Credentials credentials = Credentials.getCredentials(request, provider, Credentials.Type.ACCESS); - - JSONObject mdo_info = new JSONObject(); - mdo_info.put("name", source_name); - if (source_id != null) { - mdo_info.put("info_source",source_id); - } - - JSONObject user_info = new JSONObject(getUserInfo(credentials, provider)); - if (user_info.has("username")) { - - String user_id = user_info.getString("id"); - boolean allowed = isAllowedToWrite(provider, graph, user_id); - - if (allowed) { - List formparams = new ArrayList(); - formparams.add(new BasicNameValuePair("user", user_info.getString("id"))); - formparams.add(new BasicNameValuePair("action_type", "LOAD_TRIPLE")); - formparams.add(new BasicNameValuePair("operator", user_info.getString("id"))); - formparams.add(new BasicNameValuePair("software_tool_used", GRIDWORKS_ID)); - formparams.add(new BasicNameValuePair("rabj", "true")); - formparams.add(new BasicNameValuePair("mdo_info", mdo_info.toString())); - formparams.add(new BasicNameValuePair("graphport", graph)); - formparams.add(new BasicNameValuePair("payload", triples)); - formparams.add(new BasicNameValuePair("check_params", "false")); - if (mdo_id != null) { - formparams.add(new BasicNameValuePair("mdo_guid", mdo_id)); - } - UrlEncodedFormEntity entity = new UrlEncodedFormEntity(formparams, "UTF-8"); - - HttpPost httpRequest = new HttpPost(getFreeQUrl()); - httpRequest.getParams().setParameter(CoreProtocolPNames.USER_AGENT, "Gridworks " + GridworksServlet.getVersion()); - httpRequest.setEntity(entity); - - HttpPost surrogateRequest = new HttpPost(getUserInfoURL(FREEBASE_HOST)); - surrogateRequest.setEntity(entity); - - OAuthConsumer consumer = OAuthUtilities.getConsumer(credentials, provider); - - consumer.sign(surrogateRequest); - - Header[] h = surrogateRequest.getHeaders("Authorization"); - if (h.length > 0) { - httpRequest.setHeader("X-Freebase-Credentials", h[0].getValue()); - } else { - throw new RuntimeException("Couldn't find the oauth signature header in the surrogate request"); - } - - // execute the request - HttpClient httpClient = new DefaultHttpClient(); - HttpResponse httpResponse = httpClient.execute(httpRequest); - - // return the results - return EntityUtils.toString(httpResponse.getEntity()); - } else { - throw new RuntimeException("User '" + user_id + "' is not allowed to write to '" + graph + "' with Gridworks"); - } - } else { - throw new RuntimeException("Invalid credentials"); - } - } - - private static boolean isAllowedToWrite(Provider provider, String graph, String user_id) throws JSONException, ClientProtocolException, IOException { - if ("sandbox".equals(graph)) return true; - - JSONObject user_badges = new JSONObject(getUserBadges(provider, user_id)); - JSONObject result = user_badges.getJSONObject("result"); - - if (result == null) { - throw new RuntimeException("Error evaluating badges for user '" + user_id + "'"); - } - - boolean allowed = false; - - JSONArray badges = result.getJSONArray("!/type/usergroup/member"); - for (int i = 0; i < badges.length(); i++) { - JSONObject o = badges.getJSONObject(i); - String id = o.getString("id"); - if ("/en/metaweb_staff".equals(id)) { - allowed = true; - break; - } - } - - return allowed; - } - - static public String getFreeQUrl() { - String url = (String) ProjectManager.singleton.getPreferenceStore().get("freebase.freeq"); - return url != null ? url : FREEQ_URL; - } -} diff --git a/main/src/com/google/gridworks/util/IOUtils.java b/main/src/com/google/gridworks/util/IOUtils.java deleted file mode 100644 index fe50d8656..000000000 --- a/main/src/com/google/gridworks/util/IOUtils.java +++ /dev/null @@ -1,45 +0,0 @@ -package com.google.gridworks.util; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; - -public class IOUtils { - - private static final int DEFAULT_BUFFER_SIZE = 4 * 1024; - - public static long copy(InputStream input, OutputStream output) throws IOException { - byte[] buffer = new byte[DEFAULT_BUFFER_SIZE]; - long count = 0; - int n = 0; - while (-1 != (n = input.read(buffer))) { - output.write(buffer, 0, n); - count += n; - } - return count; - } - - public static long copy(InputStream input, File file) throws IOException { - FileOutputStream output = new FileOutputStream(file); - byte[] buffer = new byte[DEFAULT_BUFFER_SIZE]; - long count = 0; - int n = 0; - try { - while (-1 != (n = input.read(buffer))) { - output.write(buffer, 0, n); - count += n; - } - } finally { - try { - output.close(); - } catch (IOException e) {} - try { - input.close(); - } catch (IOException e) {} - } - return count; - } - -} diff --git a/main/src/com/google/gridworks/util/IndentWriter.java b/main/src/com/google/gridworks/util/IndentWriter.java deleted file mode 100644 index 468b17095..000000000 --- a/main/src/com/google/gridworks/util/IndentWriter.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Created on Dec 1, 2005 - * Created by dfhuynh - */ -package com.google.gridworks.util; - -import java.io.IOException; -import java.io.Writer; - -/** - * A utility for writing indented code. - * - * @author dfhuynh - */ -public class IndentWriter { - final static private int s_max = 20; - - static private String[] s_indents = new String[s_max]; - static { - for (int i = 0; i < s_max; i++) { - StringBuffer sb = new StringBuffer(s_max); - for (int j = 0; j < i; j++) { - sb.append('\t'); - } - s_indents[i] = sb.toString(); - } - } - - private Writer m_writer; - private int m_count = 0; - private boolean m_indent = true; - - public IndentWriter(Writer writer) { - m_writer = writer; - } - - public void close() throws IOException { - m_writer.close(); - } - - public void flush() throws IOException { - m_writer.flush(); - } - - public void print(Object o) throws IOException { - printIndent(); - m_writer.write(o.toString()); - m_indent = false; - } - - public void println() throws IOException { - printIndent(); - m_writer.write("\n"); - m_indent = true; - } - - public void println(Object o) throws IOException { - printIndent(); - m_writer.write(o.toString()); - m_writer.write("\n"); - m_indent = true; - } - - public void indent() { - m_count++; - } - - public void unindent() { - m_count--; - } - - private void printIndent() throws IOException { - if (m_indent) { - m_writer.write(s_indents[m_count]); - } - } -} diff --git a/main/src/com/google/gridworks/util/JSONUtilities.java b/main/src/com/google/gridworks/util/JSONUtilities.java deleted file mode 100644 index ef2b56212..000000000 --- a/main/src/com/google/gridworks/util/JSONUtilities.java +++ /dev/null @@ -1,155 +0,0 @@ -package com.google.gridworks.util; - -import java.util.ArrayList; -import java.util.Calendar; -import java.util.Date; -import java.util.List; - -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONWriter; - -public class JSONUtilities { - static public String getString(JSONObject obj, String key, String def) { - try { - return obj.getString(key); - } catch (JSONException e) { - return def; - } - } - - static public int getInt(JSONObject obj, String key, int def) { - try { - return obj.getInt(key); - } catch (JSONException e) { - return def; - } - } - - static public boolean getBoolean(JSONObject obj, String key, boolean def) { - try { - return obj.getBoolean(key); - } catch (JSONException e) { - return def; - } - } - - static public double getDouble(JSONObject obj, String key, double def) { - try { - return obj.getDouble(key); - } catch (JSONException e) { - return def; - } - } - - static public long getLong(JSONObject obj, String key, long def) { - try { - return obj.getLong(key); - } catch (JSONException e) { - return def; - } - } - - static public Date getDate(JSONObject obj, String key, Date def) { - try { - Date d = ParsingUtilities.stringToDate(obj.getString(key)); - - return d != null ? d : def; - } catch (JSONException e) { - return def; - } - } - - static public int[] getIntArray(JSONObject obj, String key) { - try { - JSONArray a = obj.getJSONArray(key); - int[] r = new int[a.length()]; - - for (int i = 0; i < r.length; i++) { - r[i] = a.getInt(i); - } - - return r; - } catch (JSONException e) { - return new int[0]; - } - } - - static public String[] getStringArray(JSONObject obj, String key) { - try { - JSONArray a = obj.getJSONArray(key); - String[] r = new String[a.length()]; - - for (int i = 0; i < r.length; i++) { - r[i] = a.getString(i); - } - - return r; - } catch (JSONException e) { - return new String[0]; - } - } - - static public void getStringList(JSONObject obj, String key, List list) { - try { - JSONArray a = obj.getJSONArray(key); - int count = a.length(); - - for (int i = 0; i < count; i++) { - list.add(a.getString(i)); - } - } catch (JSONException e) { - } - } - - static public void writeStringList(JSONWriter writer, List list) throws JSONException { - writer.array(); - for (String s : list) { - writer.value(s); - } - writer.endArray(); - } - - static public void putField(JSONObject obj, String key, Object value) throws JSONException { - if (value instanceof Integer) { - obj.put(key, ((Integer) value).intValue()); - } else if (value instanceof Long) { - obj.put(key, ((Long) value).intValue()); - } else if (value instanceof Number) { - obj.put(key, ((Double) value).doubleValue()); - } else if (value instanceof Boolean) { - obj.put(key, (Boolean) value); - } else if (value instanceof Date) { - obj.put(key, ParsingUtilities.dateToString((Date) value)); - } else if (value instanceof Calendar) { - obj.put(key, ParsingUtilities.dateToString(((Calendar) value).getTime())); - } else if (value instanceof String) { - obj.put(key, (String) value); - } else { - obj.put(key, value.toString()); - } - } - - static public Object[] toArray(JSONArray a) throws JSONException { - int l = a.length(); - - Object[] a2 = new Object[l]; - for (int i = 0; i < l; i++) { - a2[i] = a.get(i); - } - - return a2; - } - - static public List toStringList(JSONArray a) throws JSONException { - int l = a.length(); - - List list = new ArrayList(); - for (int i = 0; i < l; i++) { - list.add(a.getString(i)); - } - - return list; - } -} diff --git a/main/src/com/google/gridworks/util/JSObject.java b/main/src/com/google/gridworks/util/JSObject.java deleted file mode 100644 index 3b1930de2..000000000 --- a/main/src/com/google/gridworks/util/JSObject.java +++ /dev/null @@ -1,130 +0,0 @@ -package com.google.gridworks.util; - -import java.io.IOException; -import java.util.Collection; -import java.util.Enumeration; -import java.util.Iterator; -import java.util.Properties; - -import org.apache.commons.lang.StringEscapeUtils; -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; - -/** - * A utility class for encapsulating a Javascript object that can - * then be pretty-printed out through an IndentWriter. - * - * @author dfhuynh - */ -public class JSObject extends Properties { - private static final long serialVersionUID = 5864375136126385719L; - - static public void writeJSObject(IndentWriter writer, JSObject jso) throws IOException, JSONException { - writer.println("{"); - writer.indent(); - { - Enumeration e = jso.propertyNames(); - while (e.hasMoreElements()) { - String name = (String) e.nextElement(); - Object value = jso.get(name); - - writer.print("'"); - writer.print(name + "' : "); - writeObject(writer, value); - - if (e.hasMoreElements()) { - writer.println(","); - } else { - writer.println(); - } - } - } - writer.unindent(); - writer.print("}"); - } - - static public void writeCollection(IndentWriter writer, Collection c) throws IOException, JSONException { - writer.println("["); - writer.indent(); - { - Iterator i = c.iterator(); - while (i.hasNext()) { - writeObject(writer, i.next()); - if (i.hasNext()) { - writer.println(","); - } else { - writer.println(); - } - } - } - writer.unindent(); - writer.print("]"); - } - - static public void writeJSONObject(IndentWriter writer, JSONObject no) throws IOException, JSONException { - writer.println("{"); - writer.indent(); - { - String[] names = JSONObject.getNames(no); - for (int i = 0; i < names.length; i++) { - String name = names[i]; - Object value = no.get(name); - - writer.print("'"); - writer.print(name + "' : "); - writeObject(writer, value); - - if (i < names.length - 1) { - writer.println(","); - } else { - writer.println(); - } - } - } - writer.unindent(); - writer.print("}"); - } - - static public void writeJSONArray(IndentWriter writer, JSONArray na) throws IOException, JSONException { - writer.println("["); - writer.indent(); - { - int count = na.length(); - for (int i = 0; i < count; i++) { - Object element = na.get(i); - - writeObject(writer, element); - if (i < count - 1) { - writer.println(","); - } else { - writer.println(); - } - } - } - writer.unindent(); - writer.print("]"); - } - - static public void writeObject(IndentWriter writer, Object o) throws IOException, JSONException { - if (o == null) { - writer.print("null"); - } else if (o instanceof Boolean) { - writer.print(((Boolean) o).booleanValue() ? "true" : "false"); - } else if (o instanceof Number) { - writer.print(((Number) o).toString()); - - } else if (o instanceof Collection) { - writeCollection(writer, (Collection) o); - } else if (o instanceof JSONArray) { - writeJSONArray(writer, (JSONArray) o); - } else if (o instanceof JSObject) { - writeJSObject(writer, (JSObject) o); - } else if (o instanceof JSONObject) { - writeJSONObject(writer, (JSONObject) o); - - } else { - writer.print("\"" + StringEscapeUtils.escapeJavaScript(o.toString()) + "\""); - } - } -} diff --git a/main/src/com/google/gridworks/util/ParsingUtilities.java b/main/src/com/google/gridworks/util/ParsingUtilities.java deleted file mode 100644 index 6a779d457..000000000 --- a/main/src/com/google/gridworks/util/ParsingUtilities.java +++ /dev/null @@ -1,128 +0,0 @@ -package com.google.gridworks.util; - -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.Reader; -import java.io.UnsupportedEncodingException; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.Date; -import java.util.Properties; - -import javax.servlet.http.HttpServletRequest; - -import org.apache.commons.codec.DecoderException; -import org.apache.commons.codec.net.URLCodec; -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONTokener; - -public class ParsingUtilities { - - static final public SimpleDateFormat s_sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"); - - static public Properties parseUrlParameters(HttpServletRequest request) { - Properties options = new Properties(); - - String query = request.getQueryString(); - if (query != null) { - if (query.startsWith("?")) { - query = query.substring(1); - } - parseParameters(options,query); - } - return options; - } - - static public Properties parseParameters(Properties p, String str) { - if (str != null) { - String[] pairs = str.split("&"); - for (String pairString : pairs) { - int equal = pairString.indexOf('='); - String name = (equal >= 0) ? pairString.substring(0, equal) : ""; - String value = (equal >= 0) ? ParsingUtilities.decode(pairString.substring(equal + 1)) : ""; - p.put(name, value); - } - } - return p; - } - - static public Properties parseParameters(String str) { - return (str == null) ? null : parseParameters(new Properties(),str); - } - - static public String inputStreamToString(InputStream is) throws IOException { - Reader reader = new InputStreamReader(is, "UTF-8"); - try { - return readerToString(reader); - } finally { - reader.close(); - } - } - - static public String readerToString(Reader reader) throws IOException { - StringBuffer sb = new StringBuffer(); - - char[] chars = new char[8192]; - int c; - - while ((c = reader.read(chars)) > 0) { - sb.insert(sb.length(), chars, 0, c); - } - - return sb.toString(); - } - - static public JSONObject evaluateJsonStringToObject(String s) throws JSONException { - if( s == null ) throw new IllegalArgumentException("parameter 's' should not be null"); - JSONTokener t = new JSONTokener(s); - Object o = t.nextValue(); - if (o instanceof JSONObject) { - return (JSONObject) o; - } else { - throw new JSONException(s + " couldn't be parsed as JSON object"); - } - } - - static public JSONArray evaluateJsonStringToArray(String s) throws JSONException { - JSONTokener t = new JSONTokener(s); - Object o = t.nextValue(); - if (o instanceof JSONArray) { - return (JSONArray) o; - } else { - throw new JSONException(s + " couldn't be parsed as JSON array"); - } - } - - private static final URLCodec codec = new URLCodec(); - static public String encode(String s) { - try { - return codec.encode(s, "UTF-8"); - } catch (UnsupportedEncodingException e) { - return s; // should not happen - } - } - static public String decode(String s) { - try { - return codec.decode(s, "UTF-8"); - } catch (UnsupportedEncodingException e) { - return s; // should not happen - } catch (DecoderException e) { - return s; // should not happen - } - } - - static public String dateToString(Date d) { - return s_sdf.format(d); - } - - static public Date stringToDate(String s) { - try { - return s_sdf.parse(s); - } catch (ParseException e) { - return null; - } - } -} diff --git a/main/src/com/google/gridworks/util/Pool.java b/main/src/com/google/gridworks/util/Pool.java deleted file mode 100644 index 780a26d99..000000000 --- a/main/src/com/google/gridworks/util/Pool.java +++ /dev/null @@ -1,162 +0,0 @@ -package com.google.gridworks.util; - -import java.io.IOException; -import java.io.LineNumberReader; -import java.io.OutputStream; -import java.io.OutputStreamWriter; -import java.io.Reader; -import java.io.Writer; -import java.util.Collection; -import java.util.HashMap; -import java.util.Map; -import java.util.Properties; -import java.util.Map.Entry; - -import org.json.JSONException; -import org.json.JSONWriter; - -import com.google.gridworks.GridworksServlet; -import com.google.gridworks.Jsonizable; -import com.google.gridworks.model.Recon; -import com.google.gridworks.model.ReconCandidate; - -public class Pool implements Jsonizable { - final protected Map candidates = new HashMap(); - final protected Map recons = new HashMap(); - - public void pool(ReconCandidate candidate) { - candidates.put(candidate.id, candidate); - } - - public void pool(Recon recon) { - recons.put(Long.toString(recon.id), recon); - poolReconCandidates(recon); - } - - public void poolReconCandidates(Recon recon) { - if (recon.match != null) { - pool(recon.match); - } - if (recon.candidates != null) { - for (ReconCandidate candidate : recon.candidates) { - pool(candidate); - } - } - } - - public Recon getRecon(String id) { - return recons.get(id); - } - - public ReconCandidate getReconCandidate(String topicID) { - return candidates.get(topicID); - } - - public void save(OutputStream out) throws IOException { - Writer writer = new OutputStreamWriter(out); - try { - save(writer); - } finally { - writer.flush(); - } - } - - public void save(Writer writer) throws IOException { - writer.write(GridworksServlet.getVersion()); writer.write('\n'); - - Properties options = new Properties(); - options.setProperty("mode", "save"); - options.put("pool", this); - - Collection candidates2 = candidates.values(); - writer.write("reconCandidateCount=" + candidates2.size()); writer.write('\n'); - - for (ReconCandidate c : candidates2) { - JSONWriter jsonWriter = new JSONWriter(writer); - try { - c.write(jsonWriter, options); - - writer.write('\n'); - } catch (JSONException e) { - e.printStackTrace(); - } - } - - Collection recons2 = recons.values(); - writer.write("reconCount=" + recons2.size()); writer.write('\n'); - - for (Recon recon : recons2) { - JSONWriter jsonWriter = new JSONWriter(writer); - try { - recon.write(jsonWriter, options); - - writer.write('\n'); - } catch (JSONException e) { - e.printStackTrace(); - } - } - } - - public void load(Reader reader) throws Exception { - LineNumberReader reader2 = new LineNumberReader(reader); - - /* String version = */ reader2.readLine(); - - String line; - while ((line = reader2.readLine()) != null) { - int equal = line.indexOf('='); - CharSequence field = line.subSequence(0, equal); - String value = line.substring(equal + 1); - - if ("reconCandidateCount".equals(field)) { - int count = Integer.parseInt(value); - - for (int i = 0; i < count; i++) { - line = reader2.readLine(); - if (line != null) { - ReconCandidate candidate = ReconCandidate.loadStreaming(line); - if (candidate != null) { - pool(candidate); - } - } - } - } else if ("reconCount".equals(field)) { - int count = Integer.parseInt(value); - - for (int i = 0; i < count; i++) { - line = reader2.readLine(); - if (line != null) { - Recon recon = Recon.loadStreaming(line, this); - if (recon != null) { - pool(recon); - } - } - } - } - } - } - - public void write(JSONWriter writer, Properties options) - throws JSONException { - - writer.object(); - - writer.key("reconCandidates"); - writer.object(); - for (Entry entry : candidates.entrySet()) { - writer.key(entry.getKey()); - entry.getValue().write(writer, options); - } - writer.endObject(); - - writer.key("recons"); - writer.object(); - for (Entry entry : recons.entrySet()) { - writer.key(entry.getKey().toString()); - entry.getValue().write(writer, options); - } - writer.endObject(); - - writer.endObject(); - } -} diff --git a/main/src/com/google/refine/ClientSideResourceManager.java b/main/src/com/google/refine/ClientSideResourceManager.java new file mode 100644 index 000000000..170bf937a --- /dev/null +++ b/main/src/com/google/refine/ClientSideResourceManager.java @@ -0,0 +1,91 @@ +package com.google.refine; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import edu.mit.simile.butterfly.ButterflyModule; +import edu.mit.simile.butterfly.MountPoint; + + +public class ClientSideResourceManager { + final static Logger logger = LoggerFactory.getLogger("gridworks_clientSideResourceManager"); + + static public class QualifiedPath { + public ButterflyModule module; + public String path; + public String fullPath; + } + static public class ClientSideResourceBundle { + final protected Set _pathSet = new HashSet(); + final protected List _pathList = new ArrayList(); + } + + final static protected Map s_bundles + = new HashMap(); + + static public void addPaths( + String bundleName, + ButterflyModule module, + String[] paths) { + + ClientSideResourceBundle bundle = s_bundles.get(bundleName); + if (bundle == null) { + bundle = new ClientSideResourceBundle(); + s_bundles.put(bundleName, bundle); + } + + for (String path : paths) { + String fullPath = resolve(module, path); + if (fullPath == null) { + logger.error("Failed to add paths to unmounted module " + module.getName()); + break; + } + if (!bundle._pathSet.contains(fullPath)) { + QualifiedPath qualifiedPath = new QualifiedPath(); + qualifiedPath.module = module; + qualifiedPath.path = path; + qualifiedPath.fullPath = fullPath; + + bundle._pathSet.add(fullPath); + bundle._pathList.add(qualifiedPath); + } + } + } + + static public QualifiedPath[] getPaths(String bundleName) { + ClientSideResourceBundle bundle = s_bundles.get(bundleName); + if (bundle == null) { + return new QualifiedPath[] {}; + } else { + QualifiedPath[] paths = new QualifiedPath[bundle._pathList.size()]; + bundle._pathList.toArray(paths); + return paths; + } + } + + static protected String resolve(ButterflyModule module, String path) { + MountPoint mountPoint = module.getMountPoint(); + if (mountPoint != null) { + String mountPointPath = mountPoint.getMountPoint(); + if (mountPointPath != null) { + StringBuffer sb = new StringBuffer(); + + boolean slashed = path.startsWith("/"); + char[] mountPointChars = mountPointPath.toCharArray(); + + sb.append(mountPointChars, 0, slashed ? mountPointChars.length - 1 : mountPointChars.length); + sb.append(path); + + return sb.toString(); + } + } + return null; + } +} diff --git a/main/src/com/google/refine/GridworksServlet.java b/main/src/com/google/refine/GridworksServlet.java new file mode 100644 index 000000000..c25543e8d --- /dev/null +++ b/main/src/com/google/refine/GridworksServlet.java @@ -0,0 +1,230 @@ +package com.google.refine; + +import java.io.File; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.Timer; +import java.util.TimerTask; + +import javax.servlet.ServletConfig; +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.refine.commands.Command; +import com.google.refine.io.FileProjectManager; + +import edu.mit.simile.butterfly.Butterfly; +import edu.mit.simile.butterfly.ButterflyModule; + +public class GridworksServlet extends Butterfly { + + static private final String VERSION = "1.5"; + + private static final long serialVersionUID = 2386057901503517403L; + + private static final String JAVAX_SERVLET_CONTEXT_TEMPDIR = "javax.servlet.context.tempdir"; + + static private GridworksServlet s_singleton; + static private File s_dataDir; + + static final private Map commands = new HashMap(); + + // timer for periodically saving projects + static private Timer _timer; + + final static Logger logger = LoggerFactory.getLogger("gridworks"); + + public static String getVersion() { + return VERSION; + } + + final static protected long s_autoSavePeriod = 1000 * 60 * 5; // 5 minutes + + static protected class AutoSaveTimerTask extends TimerTask { + public void run() { + try { + ProjectManager.singleton.save(false); // quick, potentially incomplete save + } finally { + _timer.schedule(new AutoSaveTimerTask(), s_autoSavePeriod); + // we don't use scheduleAtFixedRate because that might result in + // bunched up events when the computer is put in sleep mode + } + } + } + + protected ServletConfig config; + + @Override + public void init() throws ServletException { + super.init(); + + s_singleton = this; + + logger.trace("> initialize"); + + String data = getInitParameter("gridworks.data"); + + if (data == null) { + throw new ServletException("can't find servlet init config 'gridworks.data', I have to give up initializing"); + } + + s_dataDir = new File(data); + FileProjectManager.initialize(s_dataDir); + + if (_timer == null) { + _timer = new Timer("autosave"); + _timer.schedule(new AutoSaveTimerTask(), s_autoSavePeriod); + } + + logger.trace("< initialize"); + } + + @Override + public void destroy() { + logger.trace("> destroy"); + + // cancel automatic periodic saving and force a complete save. + if (_timer != null) { + _timer.cancel(); + _timer = null; + } + if (ProjectManager.singleton != null) { + ProjectManager.singleton.dispose(); + ProjectManager.singleton = null; + } + + this.config = null; + + logger.trace("< destroy"); + + super.destroy(); + } + + @Override + public void service(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + if (request.getPathInfo().startsWith("/command/")) { + String commandKey = getCommandKey(request); + Command command = commands.get(commandKey); + if (command != null) { + if (request.getMethod().equals("GET")) { + logger.trace("> GET {}", commandKey); + command.doGet(request, response); + logger.trace("< GET {}", commandKey); + } else if (request.getMethod().equals("POST")) { + logger.trace("> POST {}", commandKey); + command.doPost(request, response); + logger.trace("< POST {}", commandKey); + } else { + response.sendError(405); + } + } else { + response.sendError(404); + } + } else { + super.service(request, response); + } + } + + protected String getCommandKey(HttpServletRequest request) { + // A command path has this format: /command/module-name/command-name/... + + String path = request.getPathInfo().substring("/command/".length()); + + int slash1 = path.indexOf('/'); + if (slash1 >= 0) { + int slash2 = path.indexOf('/', slash1 + 1); + if (slash2 > 0) { + path = path.substring(0, slash2); + } + } + + return path; + } + + private File tempDir = null; + + public File getTempDir() { + if (tempDir == null) { + File tempDir = (File) this.config.getServletContext().getAttribute(JAVAX_SERVLET_CONTEXT_TEMPDIR); + if (tempDir == null) { + throw new RuntimeException("This app server doesn't support temp directories"); + } + } + return tempDir; + } + + public File getTempFile(String name) { + return new File(getTempDir(), name); + } + + public File getCacheDir(String name) { + File dir = new File(new File(s_dataDir, "cache"), name); + dir.mkdirs(); + + return dir; + } + + public String getConfiguration(String name, String def) { + return null; + } + + /** + * Register a single command. + * + * @param module the module the command belongs to + * @param name command verb for command + * @param commandObject object implementing the command + * @return true if command was loaded and registered successfully + */ + protected boolean registerOneCommand(ButterflyModule module, String name, Command commandObject) { + return registerOneCommand(module.getName() + "/" + name, commandObject); + } + + /** + * Register a single command. + * + * @param path path for command + * @param commandObject object implementing the command + * @return true if command was loaded and registered successfully + */ + protected boolean registerOneCommand(String path, Command commandObject) { + if (commands.containsKey(path)) { + return false; + } + + commandObject.init(this); + commands.put(path, commandObject); + + return true; + } + + // Currently only for test purposes + protected boolean unregisterCommand(String verb) { + return commands.remove(verb) != null; + } + + /** + * Register a single command. Used by extensions. + * + * @param module the module the command belongs to + * @param name command verb for command + * @param commandObject object implementing the command + * + * @return true if command was loaded and registered successfully + */ + static public boolean registerCommand(ButterflyModule module, String commandName, Command commandObject) { + return s_singleton.registerOneCommand(module, commandName, commandObject); + } + + static public Class getClass(String className) throws ClassNotFoundException { + if (className.startsWith("com.metaweb.")) { + className = "com.google." + className.substring("com.metaweb.".length()); + } + return Class.forName(className); + } +} \ No newline at end of file diff --git a/main/src/com/google/refine/InterProjectModel.java b/main/src/com/google/refine/InterProjectModel.java new file mode 100644 index 000000000..9258984d8 --- /dev/null +++ b/main/src/com/google/refine/InterProjectModel.java @@ -0,0 +1,128 @@ +package com.google.refine; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.expr.HasFieldsListImpl; +import com.google.refine.expr.WrappedRow; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.Row; + +public class InterProjectModel { + static public class ProjectJoin { + final public long fromProjectID; + final public String fromProjectColumnName; + final public long toProjectID; + final public String toProjectColumnName; + + final public Map> valueToRowIndices = + new HashMap>(); + + ProjectJoin( + long fromProjectID, + String fromProjectColumnName, + long toProjectID, + String toProjectColumnName + ) { + this.fromProjectID = fromProjectID; + this.fromProjectColumnName = fromProjectColumnName; + this.toProjectID = toProjectID; + this.toProjectColumnName = toProjectColumnName; + } + + public HasFieldsListImpl getRows(Object value) { + if (ExpressionUtils.isNonBlankData(value) && valueToRowIndices.containsKey(value)) { + Project toProject = ProjectManager.singleton.getProject(toProjectID); + if (toProject != null) { + HasFieldsListImpl rows = new HasFieldsListImpl(); + for (Integer r : valueToRowIndices.get(value)) { + Row row = toProject.rows.get(r); + rows.add(new WrappedRow(toProject, r, row)); + } + + return rows; + } + } + return null; + } + } + + protected Map _joins = new HashMap(); + + public ProjectJoin getJoin(String fromProject, String fromColumn, String toProject, String toColumn) { + String key = fromProject + ";" + fromColumn + ";" + toProject + ";" + toColumn; + if (!_joins.containsKey(key)) { + ProjectJoin join = new ProjectJoin( + ProjectManager.singleton.getProjectID(fromProject), + fromColumn, + ProjectManager.singleton.getProjectID(toProject), + toColumn + ); + + computeJoin(join); + + _joins.put(key, join); + } + + return _joins.get(key); + } + + public void flushJoinsInvolvingProject(long projectID) { + for (Entry entry : _joins.entrySet()) { + ProjectJoin join = entry.getValue(); + if (join.fromProjectID == projectID || join.toProjectID == projectID) { + _joins.remove(entry.getKey()); + } + } + } + + public void flushJoinsInvolvingProjectColumn(long projectID, String columnName) { + for (Entry entry : _joins.entrySet()) { + ProjectJoin join = entry.getValue(); + if (join.fromProjectID == projectID && join.fromProjectColumnName.equals(columnName) || + join.toProjectID == projectID && join.toProjectColumnName.equals(columnName)) { + _joins.remove(entry.getKey()); + } + } + } + + protected void computeJoin(ProjectJoin join) { + if (join.fromProjectID < 0 || join.toProjectID < 0) { + return; + } + + Project fromProject = ProjectManager.singleton.getProject(join.fromProjectID); + Project toProject = ProjectManager.singleton.getProject(join.toProjectID); + if (fromProject == null || toProject == null) { + return; + } + + Column fromColumn = fromProject.columnModel.getColumnByName(join.fromProjectColumnName); + Column toColumn = toProject.columnModel.getColumnByName(join.toProjectColumnName); + if (fromColumn == null || toColumn == null) { + return; + } + + for (Row fromRow : fromProject.rows) { + Object value = fromRow.getCellValue(fromColumn.getCellIndex()); + if (ExpressionUtils.isNonBlankData(value) && !join.valueToRowIndices.containsKey(value)) { + join.valueToRowIndices.put(value, new ArrayList()); + } + } + + int count = toProject.rows.size(); + for (int r = 0; r < count; r++) { + Row toRow = toProject.rows.get(r); + + Object value = toRow.getCellValue(toColumn.getCellIndex()); + if (ExpressionUtils.isNonBlankData(value) && join.valueToRowIndices.containsKey(value)) { + join.valueToRowIndices.get(value).add(r); + } + } + } +} diff --git a/main/src/com/google/refine/Jsonizable.java b/main/src/com/google/refine/Jsonizable.java new file mode 100644 index 000000000..ffa60b61d --- /dev/null +++ b/main/src/com/google/refine/Jsonizable.java @@ -0,0 +1,16 @@ +package com.google.refine; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +/** + * Interface for streaming out JSON, either into HTTP responses or + * serialization files. + * + * @author dfhuynh + */ +public interface Jsonizable { + public void write(JSONWriter writer, Properties options) throws JSONException; +} diff --git a/main/src/com/google/refine/ProjectManager.java b/main/src/com/google/refine/ProjectManager.java new file mode 100644 index 000000000..c8616c7c7 --- /dev/null +++ b/main/src/com/google/refine/ProjectManager.java @@ -0,0 +1,434 @@ +package com.google.refine; + +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.tools.tar.TarOutputStream; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.refine.history.HistoryEntryManager; +import com.google.refine.model.Project; +import com.google.refine.preference.PreferenceStore; +import com.google.refine.preference.TopList; + +/** + * ProjectManager is responsible for loading and saving the workspace and projects. + * + * + */ +public abstract class ProjectManager { + // last n expressions used across all projects + static protected final int s_expressionHistoryMax = 100; + + protected Map _projectsMetadata; + protected PreferenceStore _preferenceStore; + + final static Logger logger = LoggerFactory.getLogger("project_manager"); + + /** + * What caches the joins between projects. + */ + transient protected InterProjectModel _interProjectModel = new InterProjectModel(); + + /** + * Flags + */ + transient protected int _busy = 0; // heavy operations like creating or importing projects are going on + + /** + * While each project's metadata is loaded completely at start-up, each project's raw data + * is loaded only when the project is accessed by the user. This is because project + * metadata is tiny compared to raw project data. This hash map from project ID to project + * is more like a last accessed-last out cache. + */ + transient protected Map _projects; + + static public ProjectManager singleton; + + protected ProjectManager(){ + _projectsMetadata = new HashMap(); + _preferenceStore = new PreferenceStore(); + _projects = new HashMap(); + + preparePreferenceStore(_preferenceStore); + } + + public void dispose() { + save(true); // complete save + + for (Project project : _projects.values()) { + if (project != null) { + project.dispose(); + } + } + + _projects.clear(); + _projectsMetadata.clear(); + } + + /** + * Registers the project in the memory of the current session + * @param project + * @param projectMetadata + */ + public void registerProject(Project project, ProjectMetadata projectMetadata) { + synchronized (this) { + _projects.put(project.id, project); + _projectsMetadata.put(project.id, projectMetadata); + } + } + //----------Load from data store to memory---------------- + + /** + * Load project metadata from data storage + * @param projectID + * @return + */ + public abstract boolean loadProjectMetadata(long projectID); + + /** + * Loads a project from the data store into memory + * @param id + * @return + */ + protected abstract Project loadProject(long id); + + //------------Import and Export from Gridworks archive----------------- + /** + * Import project from a Gridworks archive + * @param projectID + * @param inputStream + * @param gziped + * @throws IOException + */ + public abstract void importProject(long projectID, InputStream inputStream, boolean gziped) throws IOException; + + /** + * Export project to a Gridworks archive + * @param projectId + * @param tos + * @throws IOException + */ + public abstract void exportProject(long projectId, TarOutputStream tos) throws IOException; + + + //------------Save to record store------------ + /** + * Saves a project and its metadata to the data store + * @param id + */ + public void ensureProjectSaved(long id) { + synchronized(this){ + ProjectMetadata metadata = this.getProjectMetadata(id); + if (metadata != null) { + try { + saveMetadata(metadata, id); + } catch (Exception e) { + e.printStackTrace(); + } + }//FIXME what should be the behaviour if metadata is null? i.e. not found + + Project project = getProject(id); + if (project != null && metadata != null && metadata.getModified().after(project.getLastSave())) { + try { + saveProject(project); + } catch (Exception e) { + e.printStackTrace(); + } + }//FIXME what should be the behaviour if project is null? i.e. not found or loaded. + //FIXME what should happen if the metadata is found, but not the project? or vice versa? + } + + } + + /** + * Save project metadata to the data store + * @param metadata + * @param projectId + * @throws Exception + */ + protected abstract void saveMetadata(ProjectMetadata metadata, long projectId) throws Exception; + + /** + * Save project to the data store + * @param project + */ + protected abstract void saveProject(Project project); + + /** + * Save workspace and all projects to data store + * @param allModified + */ + public void save(boolean allModified) { + if (allModified || _busy == 0) { + saveProjects(allModified); + saveWorkspace(); + } + } + + /** + * Saves the workspace to the data store + */ + protected abstract void saveWorkspace(); + + /** + * A utility class to prioritize projects for saving, depending on how long ago + * they have been changed but have not been saved. + */ + static protected class SaveRecord { + final Project project; + final long overdue; + + SaveRecord(Project project, long overdue) { + this.project = project; + this.overdue = overdue; + } + } + + static protected final int s_projectFlushDelay = 1000 * 60 * 60; // 1 hour + static protected final int s_quickSaveTimeout = 1000 * 30; // 30 secs + + /** + * Saves all projects to the data store + * @param allModified + */ + protected void saveProjects(boolean allModified) { + List records = new ArrayList(); + Date startTimeOfSave = new Date(); + + synchronized (this) { + for (long id : _projectsMetadata.keySet()) { + ProjectMetadata metadata = getProjectMetadata(id); + Project project = _projects.get(id); // don't call getProject() as that will load the project. + + if (project != null) { + boolean hasUnsavedChanges = + metadata.getModified().getTime() > project.getLastSave().getTime(); + + if (hasUnsavedChanges) { + long msecsOverdue = startTimeOfSave.getTime() - project.getLastSave().getTime(); + + records.add(new SaveRecord(project, msecsOverdue)); + + } else if (startTimeOfSave.getTime() - project.getLastSave().getTime() > s_projectFlushDelay) { + /* + * It's been a while since the project was last saved and it hasn't been + * modified. We can safely remove it from the cache to save some memory. + */ + _projects.remove(id).dispose(); + } + } + } + } + + if (records.size() > 0) { + Collections.sort(records, new Comparator() { + public int compare(SaveRecord o1, SaveRecord o2) { + if (o1.overdue < o2.overdue) { + return 1; + } else if (o1.overdue > o2.overdue) { + return -1; + } else { + return 0; + } + } + }); + + logger.info(allModified ? + "Saving all modified projects ..." : + "Saving some modified projects ..." + ); + + for (int i = 0; + i < records.size() && + (allModified || (new Date().getTime() - startTimeOfSave.getTime() < s_quickSaveTimeout)); + i++) { + + try { + saveProject(records.get(i).project); + } catch (Exception e) { + e.printStackTrace(); + } + } + } + } + + //--------------Get from memory-------------- + /** + * Gets the InterProjectModel from memory + */ + public InterProjectModel getInterProjectModel() { + return _interProjectModel; + } + + + /** + * Gets the project metadata from memory + * Requires that the metadata has already been loaded from the data store + * @param id + * @return + */ + public ProjectMetadata getProjectMetadata(long id) { + return _projectsMetadata.get(id); + } + + /** + * Gets the project metadata from memory + * Requires that the metadata has already been loaded from the data store + * @param name + * @return + */ + public ProjectMetadata getProjectMetadata(String name) { + for (ProjectMetadata pm : _projectsMetadata.values()) { + if (pm.getName().equals(name)) { + return pm; + } + } + return null; + } + + /** + * Tries to find the project id when given a project name + * Requires that all project metadata exists has been loaded to memory from the data store + * @param name + * The name of the project + * @return + * The id of the project, or -1 if it cannot be found + */ + public long getProjectID(String name) { + for (Entry entry : _projectsMetadata.entrySet()) { + if (entry.getValue().getName().equals(name)) { + return entry.getKey(); + } + } + return -1; + } + + + /** + * Gets all the project Metadata currently held in memory + * @return + */ + public Map getAllProjectMetadata() { + return _projectsMetadata; + } + + /** + * Gets the required project from the data store + * If project does not already exist in memory, it is loaded from the data store + * @param id + * the id of the project + * @return + * the project with the matching id, or null if it can't be found + */ + public Project getProject(long id) { + synchronized (this) { + if (_projects.containsKey(id)) { + return _projects.get(id); + } else { + Project project = loadProject(id); + + _projects.put(id, project); + + return project; + } + } + } + + /** + * Gets the preference store + * @return + */ + public PreferenceStore getPreferenceStore() { + return _preferenceStore; + } + + /** + * Gets all expressions from the preference store + * @return + */ + public List getExpressions() { + return ((TopList) _preferenceStore.get("scripting.expressions")).getList(); + } + + /** + * The history entry manager deals with changes + * @return manager for handling history + */ + public abstract HistoryEntryManager getHistoryEntryManager(); + + //-------------remove project----------- + + /** + * Remove the project from the data store + * @param project + */ + public void deleteProject(Project project) { + deleteProject(project.id); + } + + /** + * Remove project from data store + * @param projectID + */ + public abstract void deleteProject(long projectID); + + /** + * Removes project from memory + * @param projectID + */ + protected void removeProject(long projectID){ + if (_projects.containsKey(projectID)) { + _projects.remove(projectID).dispose(); + } + if (_projectsMetadata.containsKey(projectID)) { + _projectsMetadata.remove(projectID); + } + } + + //--------------Miscellaneous----------- + + /** + * Sets the flag for long running operations + * @param busy + */ + public void setBusy(boolean busy) { + synchronized (this) { + if (busy) { + _busy++; + } else { + _busy--; + } + } + } + + + + /** + * Add the latest expression to the preference store + * @param s + */ + public void addLatestExpression(String s) { + synchronized (this) { + ((TopList) _preferenceStore.get("scripting.expressions")).add(s); + } + } + + + /** + * + * @param ps + */ + static protected void preparePreferenceStore(PreferenceStore ps) { + ps.put("scripting.expressions", new TopList(s_expressionHistoryMax)); + } +} diff --git a/main/src/com/google/refine/ProjectMetadata.java b/main/src/com/google/refine/ProjectMetadata.java new file mode 100644 index 000000000..512e1d860 --- /dev/null +++ b/main/src/com/google/refine/ProjectMetadata.java @@ -0,0 +1,198 @@ +package com.google.refine; + +import java.io.Serializable; +import java.util.Date; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.refine.preference.PreferenceStore; +import com.google.refine.preference.TopList; +import com.google.refine.util.JSONUtilities; +import com.google.refine.util.ParsingUtilities; + +public class ProjectMetadata implements Jsonizable { + private final Date _created; + private Date _modified; + private String _name; + private String _password; + + private String _encoding; + private int _encodingConfidence; + + private Map _customMetadata = new HashMap(); + private PreferenceStore _preferenceStore = new PreferenceStore(); + + final Logger logger = LoggerFactory.getLogger("project_metadata"); + + protected ProjectMetadata(Date date) { + _created = date; + preparePreferenceStore(_preferenceStore); + } + + public ProjectMetadata() { + _created = new Date(); + _modified = _created; + preparePreferenceStore(_preferenceStore); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("name"); writer.value(_name); + writer.key("created"); writer.value(ParsingUtilities.dateToString(_created)); + writer.key("modified"); writer.value(ParsingUtilities.dateToString(_modified)); + + if ("save".equals(options.getProperty("mode"))) { + writer.key("password"); writer.value(_password); + + writer.key("encoding"); writer.value(_encoding); + writer.key("encodingConfidence"); writer.value(_encodingConfidence); + + writer.key("customMetadata"); writer.object(); + for (String key : _customMetadata.keySet()) { + Serializable value = _customMetadata.get(key); + writer.key(key); + writer.value(value); + } + writer.endObject(); + + writer.key("preferences"); _preferenceStore.write(writer, options); + } + writer.endObject(); + } + + public void write(JSONWriter jsonWriter) throws Exception { + Properties options = new Properties(); + options.setProperty("mode", "save"); + + write(jsonWriter, options); + } + + static public ProjectMetadata loadFromJSON(JSONObject obj) { + ProjectMetadata pm = new ProjectMetadata(JSONUtilities.getDate(obj, "modified", new Date())); + + pm._modified = JSONUtilities.getDate(obj, "modified", new Date()); + pm._name = JSONUtilities.getString(obj, "name", ""); + pm._password = JSONUtilities.getString(obj, "password", ""); + + pm._encoding = JSONUtilities.getString(obj, "encoding", ""); + pm._encodingConfidence = JSONUtilities.getInt(obj, "encodingConfidence", 0); + + if (obj.has("preferences") && !obj.isNull("preferences")) { + try { + pm._preferenceStore.load(obj.getJSONObject("preferences")); + } catch (JSONException e) { + // ignore + } + } + + if (obj.has("expressions") && !obj.isNull("expressions")) { // backward compatibility + try { + ((TopList) pm._preferenceStore.get("scripting.expressions")) + .load(obj.getJSONArray("expressions")); + } catch (JSONException e) { + // ignore + } + } + + if (obj.has("customMetadata") && !obj.isNull("customMetadata")) { + try { + JSONObject obj2 = obj.getJSONObject("customMetadata"); + + @SuppressWarnings("unchecked") + Iterator keys = obj2.keys(); + while (keys.hasNext()) { + String key = keys.next(); + Object value = obj2.get(key); + if (value != null && value instanceof Serializable) { + pm._customMetadata.put(key, (Serializable) value); + } + } + } catch (JSONException e) { + // ignore + } + } + + return pm; + } + + static protected void preparePreferenceStore(PreferenceStore ps) { + ProjectManager.preparePreferenceStore(ps); + // Any project specific preferences? + } + + public Date getCreated() { + return _created; + } + + public void setName(String name) { + this._name = name; + } + + public String getName() { + return _name; + } + + public void setEncoding(String encoding) { + this._encoding = encoding; + } + + public String getEncoding() { + return _encoding; + } + + public void setEncodingConfidence(int confidence) { + this._encodingConfidence = confidence; + } + + public void setEncodingConfidence(String confidence) { + if (confidence != null) { + this.setEncodingConfidence(Integer.parseInt(confidence)); + } + } + + public int getEncodingConfidence() { + return _encodingConfidence; + } + + public void setPassword(String password) { + this._password = password; + } + + public String getPassword() { + return _password; + } + + public Date getModified() { + return _modified; + } + + public void updateModified() { + _modified = new Date(); + } + + public PreferenceStore getPreferenceStore() { + return _preferenceStore; + } + + public Serializable getCustomMetadata(String key) { + return _customMetadata.get(key); + } + + public void setCustomMetadata(String key, Serializable value) { + if (value == null) { + _customMetadata.remove(key); + } else { + _customMetadata.put(key, value); + } + } +} diff --git a/main/src/com/google/refine/browsing/DecoratedValue.java b/main/src/com/google/refine/browsing/DecoratedValue.java new file mode 100644 index 000000000..21d3d790e --- /dev/null +++ b/main/src/com/google/refine/browsing/DecoratedValue.java @@ -0,0 +1,33 @@ +package com.google.refine.browsing; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.Jsonizable; + +/** + * Store a value and its text label, in case the value is not a string itself. + * For instance, if a value is a date, then its label can be one particular + * rendering of that date. + * + * Facet choices that are presented to the user as text are stored as decorated values. + */ +public class DecoratedValue implements Jsonizable { + final public Object value; + final public String label; + + public DecoratedValue(Object value, String label) { + this.value = value; + this.label = label; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + writer.object(); + writer.key("v"); writer.value(value); + writer.key("l"); writer.value(label); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/browsing/Engine.java b/main/src/com/google/refine/browsing/Engine.java new file mode 100644 index 000000000..7481d3da4 --- /dev/null +++ b/main/src/com/google/refine/browsing/Engine.java @@ -0,0 +1,214 @@ +package com.google.refine.browsing; + +import java.util.LinkedList; +import java.util.List; +import java.util.Properties; + +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.Jsonizable; +import com.google.refine.browsing.facets.Facet; +import com.google.refine.browsing.facets.ListFacet; +import com.google.refine.browsing.facets.RangeFacet; +import com.google.refine.browsing.facets.ScatterplotFacet; +import com.google.refine.browsing.facets.TextSearchFacet; +import com.google.refine.browsing.facets.TimeRangeFacet; +import com.google.refine.browsing.util.ConjunctiveFilteredRecords; +import com.google.refine.browsing.util.ConjunctiveFilteredRows; +import com.google.refine.browsing.util.FilteredRecordsAsFilteredRows; +import com.google.refine.model.Project; +import com.google.refine.model.Row; + +/** + * Faceted browsing engine. + */ +public class Engine implements Jsonizable { + static public enum Mode { + RowBased, + RecordBased + } + + public final static String INCLUDE_DEPENDENT = "includeDependent"; + public final static String MODE = "mode"; + public final static String MODE_ROW_BASED = "row-based"; + public final static String MODE_RECORD_BASED = "record-based"; + + protected Project _project; + protected List _facets = new LinkedList(); + protected Mode _mode = Mode.RowBased; + + static public String modeToString(Mode mode) { + return mode == Mode.RowBased ? MODE_ROW_BASED : MODE_RECORD_BASED; + } + static public Mode stringToMode(String s) { + return MODE_ROW_BASED.equals(s) ? Mode.RowBased : Mode.RecordBased; + } + + public Engine(Project project) { + _project = project; + } + + public Mode getMode() { + return _mode; + } + public void setMode(Mode mode) { + _mode = mode; + } + + public FilteredRows getAllRows() { + return new FilteredRows() { + @Override + public void accept(Project project, RowVisitor visitor) { + try { + visitor.start(project); + + int c = project.rows.size(); + for (int rowIndex = 0; rowIndex < c; rowIndex++) { + Row row = project.rows.get(rowIndex); + visitor.visit(project, rowIndex, row); + } + } finally { + visitor.end(project); + } + } + }; + } + + public FilteredRows getAllFilteredRows() { + return getFilteredRows(null); + } + + public FilteredRows getFilteredRows(Facet except) { + if (_mode == Mode.RecordBased) { + return new FilteredRecordsAsFilteredRows(getFilteredRecords(except)); + } else if (_mode == Mode.RowBased) { + ConjunctiveFilteredRows cfr = new ConjunctiveFilteredRows(); + for (Facet facet : _facets) { + if (facet != except) { + RowFilter rowFilter = facet.getRowFilter(_project); + if (rowFilter != null) { + cfr.add(rowFilter); + } + } + } + return cfr; + } + throw new InternalError("Unknown mode."); + } + + public FilteredRecords getAllRecords() { + return new FilteredRecords() { + @Override + public void accept(Project project, RecordVisitor visitor) { + try { + visitor.start(project); + + int c = project.recordModel.getRecordCount(); + for (int r = 0; r < c; r++) { + visitor.visit(project, project.recordModel.getRecord(r)); + } + } finally { + visitor.end(project); + } + } + }; + } + + public FilteredRecords getFilteredRecords() { + return getFilteredRecords(null); + } + + public FilteredRecords getFilteredRecords(Facet except) { + if (_mode == Mode.RecordBased) { + ConjunctiveFilteredRecords cfr = new ConjunctiveFilteredRecords(); + for (Facet facet : _facets) { + if (facet != except) { + RecordFilter recordFilter = facet.getRecordFilter(_project); + if (recordFilter != null) { + cfr.add(recordFilter); + } + } + } + return cfr; + } + throw new InternalError("This method should not be called when the engine is not in record mode."); + } + + public void initializeFromJSON(JSONObject o) throws Exception { + if (o == null) { + return; + } + + if (o.has("facets") && !o.isNull("facets")) { + JSONArray a = o.getJSONArray("facets"); + int length = a.length(); + + for (int i = 0; i < length; i++) { + JSONObject fo = a.getJSONObject(i); + String type = fo.has("type") ? fo.getString("type") : "list"; + + Facet facet = null; + if ("list".equals(type)) { + facet = new ListFacet(); + } else if ("range".equals(type)) { + facet = new RangeFacet(); + } else if ("timerange".equals(type)) { + facet = new TimeRangeFacet(); + } else if ("scatterplot".equals(type)) { + facet = new ScatterplotFacet(); + } else if ("text".equals(type)) { + facet = new TextSearchFacet(); + } + + if (facet != null) { + facet.initializeFromJSON(_project, fo); + _facets.add(facet); + } + } + } + + // for backward compatibility + if (o.has(INCLUDE_DEPENDENT) && !o.isNull(INCLUDE_DEPENDENT)) { + _mode = o.getBoolean(INCLUDE_DEPENDENT) ? Mode.RecordBased : Mode.RowBased; + } + + if (o.has(MODE) && !o.isNull(MODE)) { + _mode = MODE_ROW_BASED.equals(o.getString(MODE)) ? Mode.RowBased : Mode.RecordBased; + } + } + + public void computeFacets() throws JSONException { + if (_mode == Mode.RowBased) { + for (Facet facet : _facets) { + FilteredRows filteredRows = getFilteredRows(facet); + + facet.computeChoices(_project, filteredRows); + } + } else if (_mode == Mode.RecordBased) { + for (Facet facet : _facets) { + FilteredRecords filteredRecords = getFilteredRecords(facet); + + facet.computeChoices(_project, filteredRecords); + } + } else { + throw new InternalError("Unknown mode."); + } + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("facets"); + writer.array(); + for (Facet facet : _facets) { + facet.write(writer, options); + } + writer.endArray(); + writer.key(MODE); writer.value(_mode == Mode.RowBased ? MODE_ROW_BASED : MODE_RECORD_BASED); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/browsing/FilteredRecords.java b/main/src/com/google/refine/browsing/FilteredRecords.java new file mode 100644 index 000000000..54ce672cd --- /dev/null +++ b/main/src/com/google/refine/browsing/FilteredRecords.java @@ -0,0 +1,18 @@ +package com.google.refine.browsing; + +import com.google.refine.model.Project; + +/** + * Interface for anything that can decide which records match and which don't + * based on some particular criteria. + */ +public interface FilteredRecords { + /** + * Go through the records of the given project, determine which match and which don't, + * and call visitor.visit() on those that match + * + * @param project + * @param visitor + */ + public void accept(Project project, RecordVisitor visitor); +} diff --git a/main/src/com/google/refine/browsing/FilteredRows.java b/main/src/com/google/refine/browsing/FilteredRows.java new file mode 100644 index 000000000..0ddbd5388 --- /dev/null +++ b/main/src/com/google/refine/browsing/FilteredRows.java @@ -0,0 +1,19 @@ +package com.google.refine.browsing; + +import com.google.refine.model.Project; + +/** + * Interface for anything that can decide which rows match and which rows don't match + * based on some particular criteria. + */ +public interface FilteredRows { + /** + * Go through the rows of the given project, determine which match and which don't, + * and call visitor.visit() on those that match, and possibly their context and + * dependent rows. + * + * @param project + * @param visitor + */ + public void accept(Project project, RowVisitor visitor); +} diff --git a/main/src/com/google/refine/browsing/RecordFilter.java b/main/src/com/google/refine/browsing/RecordFilter.java new file mode 100644 index 000000000..269381538 --- /dev/null +++ b/main/src/com/google/refine/browsing/RecordFilter.java @@ -0,0 +1,12 @@ +package com.google.refine.browsing; + +import com.google.refine.model.Project; +import com.google.refine.model.Record; + +/** + * Interface for judging if a particular record matches or doesn't match some + * particular criterion, such as a facet constraint. + */ +public interface RecordFilter { + public boolean filterRecord(Project project, Record record); +} diff --git a/main/src/com/google/refine/browsing/RecordVisitor.java b/main/src/com/google/refine/browsing/RecordVisitor.java new file mode 100644 index 000000000..e3a11696a --- /dev/null +++ b/main/src/com/google/refine/browsing/RecordVisitor.java @@ -0,0 +1,19 @@ +package com.google.refine.browsing; + +import com.google.refine.model.Project; +import com.google.refine.model.Record; + +/** + * Interface for visiting records one by one. The records visited are only those that match some + * particular criteria, such as facets' constraints. + */ +public interface RecordVisitor { + public void start(Project project); // called before any visit() call + + public boolean visit( + Project project, + Record record + ); + + public void end(Project project); // called after all visit() calls +} diff --git a/main/src/com/google/refine/browsing/RowFilter.java b/main/src/com/google/refine/browsing/RowFilter.java new file mode 100644 index 000000000..19e3816d8 --- /dev/null +++ b/main/src/com/google/refine/browsing/RowFilter.java @@ -0,0 +1,12 @@ +package com.google.refine.browsing; + +import com.google.refine.model.Project; +import com.google.refine.model.Row; + +/** + * Interface for judging if a particular row matches or doesn't match some + * particular criterion, such as a facet constraint. + */ +public interface RowFilter { + public boolean filterRow(Project project, int rowIndex, Row row); +} diff --git a/main/src/com/google/refine/browsing/RowVisitor.java b/main/src/com/google/refine/browsing/RowVisitor.java new file mode 100644 index 000000000..1358800fd --- /dev/null +++ b/main/src/com/google/refine/browsing/RowVisitor.java @@ -0,0 +1,20 @@ +package com.google.refine.browsing; + +import com.google.refine.model.Project; +import com.google.refine.model.Row; + +/** + * Interface for visiting rows one by one. The rows visited are only those that match some + * particular criteria, such as facets' constraints. + */ +public interface RowVisitor { + public void start(Project project); // called before any visit() call + + public boolean visit( + Project project, + int rowIndex, // zero-based row index + Row row + ); + + public void end(Project project); // called after all visit() calls +} diff --git a/main/src/com/google/refine/browsing/facets/Facet.java b/main/src/com/google/refine/browsing/facets/Facet.java new file mode 100644 index 000000000..d052592d0 --- /dev/null +++ b/main/src/com/google/refine/browsing/facets/Facet.java @@ -0,0 +1,25 @@ +package com.google.refine.browsing.facets; + +import org.json.JSONObject; + +import com.google.refine.Jsonizable; +import com.google.refine.browsing.FilteredRecords; +import com.google.refine.browsing.FilteredRows; +import com.google.refine.browsing.RecordFilter; +import com.google.refine.browsing.RowFilter; +import com.google.refine.model.Project; + +/** + * Interface of facets. + */ +public interface Facet extends Jsonizable { + public RowFilter getRowFilter(Project project); + + public RecordFilter getRecordFilter(Project project); + + public void computeChoices(Project project, FilteredRows filteredRows); + + public void computeChoices(Project project, FilteredRecords filteredRecords); + + public void initializeFromJSON(Project project, JSONObject o) throws Exception; +} diff --git a/main/src/com/google/refine/browsing/facets/ListFacet.java b/main/src/com/google/refine/browsing/facets/ListFacet.java new file mode 100644 index 000000000..a9fdc8672 --- /dev/null +++ b/main/src/com/google/refine/browsing/facets/ListFacet.java @@ -0,0 +1,255 @@ +package com.google.refine.browsing.facets; + +import java.util.LinkedList; +import java.util.List; +import java.util.Properties; + +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.ProjectManager; +import com.google.refine.browsing.DecoratedValue; +import com.google.refine.browsing.FilteredRecords; +import com.google.refine.browsing.FilteredRows; +import com.google.refine.browsing.RecordFilter; +import com.google.refine.browsing.RowFilter; +import com.google.refine.browsing.filters.AllRowsRecordFilter; +import com.google.refine.browsing.filters.AnyRowRecordFilter; +import com.google.refine.browsing.filters.ExpressionEqualRowFilter; +import com.google.refine.browsing.util.ExpressionNominalValueGrouper; +import com.google.refine.expr.Evaluable; +import com.google.refine.expr.MetaParser; +import com.google.refine.expr.ParsingException; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.util.JSONUtilities; + +public class ListFacet implements Facet { + /* + * Configuration + */ + protected String _name; + protected String _expression; + protected String _columnName; + protected boolean _invert; + + // If true, then facet won't show the blank and error choices + protected boolean _omitBlank; + protected boolean _omitError; + + protected List _selection = new LinkedList(); + protected boolean _selectBlank; + protected boolean _selectError; + + /* + * Derived configuration + */ + protected int _cellIndex; + protected Evaluable _eval; + protected String _errorMessage; + + /* + * Computed results + */ + protected List _choices = new LinkedList(); + protected int _blankCount; + protected int _errorCount; + + public ListFacet() { + } + + @Override + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("name"); writer.value(_name); + writer.key("expression"); writer.value(_expression); + writer.key("columnName"); writer.value(_columnName); + writer.key("invert"); writer.value(_invert); + + if (_errorMessage != null) { + writer.key("error"); writer.value(_errorMessage); + } else if (_choices.size() > getLimit()) { + writer.key("error"); writer.value("Too many choices"); + } else { + writer.key("choices"); writer.array(); + for (NominalFacetChoice choice : _choices) { + choice.write(writer, options); + } + writer.endArray(); + + if (!_omitBlank && (_selectBlank || _blankCount > 0)) { + writer.key("blankChoice"); + writer.object(); + writer.key("s"); writer.value(_selectBlank); + writer.key("c"); writer.value(_blankCount); + writer.endObject(); + } + if (!_omitError && (_selectError || _errorCount > 0)) { + writer.key("errorChoice"); + writer.object(); + writer.key("s"); writer.value(_selectError); + writer.key("c"); writer.value(_errorCount); + writer.endObject(); + } + } + + writer.endObject(); + } + + protected int getLimit() { + Object v = ProjectManager.singleton.getPreferenceStore().get("ui.browsing.listFacet.limit"); + if (v != null) { + if (v instanceof Number) { + return ((Number) v).intValue(); + } else { + try { + int n = Integer.parseInt(v.toString()); + return n; + } catch (NumberFormatException e) { + // ignore + } + } + } + return 2000; + } + + @Override + public void initializeFromJSON(Project project, JSONObject o) throws Exception { + _name = o.getString("name"); + _expression = o.getString("expression"); + _columnName = o.getString("columnName"); + _invert = o.has("invert") && o.getBoolean("invert"); + + if (_columnName.length() > 0) { + Column column = project.columnModel.getColumnByName(_columnName); + if (column != null) { + _cellIndex = column.getCellIndex(); + } else { + _errorMessage = "No column named " + _columnName; + } + } else { + _cellIndex = -1; + } + + try { + _eval = MetaParser.parse(_expression); + } catch (ParsingException e) { + _errorMessage = e.getMessage(); + } + + _selection.clear(); + + JSONArray a = o.getJSONArray("selection"); + int length = a.length(); + + for (int i = 0; i < length; i++) { + JSONObject oc = a.getJSONObject(i); + JSONObject ocv = oc.getJSONObject("v"); + + DecoratedValue decoratedValue = new DecoratedValue( + ocv.get("v"), ocv.getString("l")); + + NominalFacetChoice nominalFacetChoice = new NominalFacetChoice(decoratedValue); + nominalFacetChoice.selected = true; + + _selection.add(nominalFacetChoice); + } + + _omitBlank = JSONUtilities.getBoolean(o, "omitBlank", false); + _omitError = JSONUtilities.getBoolean(o, "omitError", false); + + _selectBlank = JSONUtilities.getBoolean(o, "selectBlank", false); + _selectError = JSONUtilities.getBoolean(o, "selectError", false); + } + + @Override + public RowFilter getRowFilter(Project project) { + return + _eval == null || + _errorMessage != null || + (_selection.size() == 0 && !_selectBlank && !_selectError) ? + null : + new ExpressionEqualRowFilter( + _eval, + _columnName, + _cellIndex, + createMatches(), + _selectBlank, + _selectError, + _invert); + } + + @Override + public RecordFilter getRecordFilter(Project project) { + RowFilter rowFilter = getRowFilter(project); + return rowFilter == null ? null : + (_invert ? + new AllRowsRecordFilter(rowFilter) : + new AnyRowRecordFilter(rowFilter)); + } + + @Override + public void computeChoices(Project project, FilteredRows filteredRows) { + if (_eval != null && _errorMessage == null) { + ExpressionNominalValueGrouper grouper = + new ExpressionNominalValueGrouper(_eval, _columnName, _cellIndex); + + filteredRows.accept(project, grouper); + + postProcessGrouper(grouper); + } + } + + @Override + public void computeChoices(Project project, FilteredRecords filteredRecords) { + if (_eval != null && _errorMessage == null) { + ExpressionNominalValueGrouper grouper = + new ExpressionNominalValueGrouper(_eval, _columnName, _cellIndex); + + filteredRecords.accept(project, grouper); + + postProcessGrouper(grouper); + } + } + + protected void postProcessGrouper(ExpressionNominalValueGrouper grouper) { + _choices.clear(); + _choices.addAll(grouper.choices.values()); + + for (NominalFacetChoice choice : _selection) { + String valueString = choice.decoratedValue.value.toString(); + + if (grouper.choices.containsKey(valueString)) { + grouper.choices.get(valueString).selected = true; + } else { + /* + * A selected choice can have zero count if it is selected together + * with other choices, and some other facets' constraints eliminate + * all rows projected to this choice altogether. For example, if you + * select both "car" and "bicycle" in the "type of vehicle" facet, and + * then constrain the "wheels" facet to more than 2, then the "bicycle" + * choice now has zero count even if it's still selected. The grouper + * won't be able to detect the "bicycle" choice, so we need to inject + * that choice into the choice list ourselves. + */ + choice.count = 0; + _choices.add(choice); + } + } + + _blankCount = grouper.blankCount; + _errorCount = grouper.errorCount; + } + + protected Object[] createMatches() { + Object[] a = new Object[_selection.size()]; + for (int i = 0; i < a.length; i++) { + a[i] = _selection.get(i).decoratedValue.value; + } + return a; + } +} diff --git a/main/src/com/google/refine/browsing/facets/NominalFacetChoice.java b/main/src/com/google/refine/browsing/facets/NominalFacetChoice.java new file mode 100644 index 000000000..691c9c961 --- /dev/null +++ b/main/src/com/google/refine/browsing/facets/NominalFacetChoice.java @@ -0,0 +1,32 @@ +package com.google.refine.browsing.facets; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.Jsonizable; +import com.google.refine.browsing.DecoratedValue; + +/** + * Store a facet choice that has a decorated value, a count of matched rows, + * and a flag of whether it has been selected. + */ +public class NominalFacetChoice implements Jsonizable { + final public DecoratedValue decoratedValue; + public int count; + public boolean selected; + + public NominalFacetChoice(DecoratedValue decoratedValue) { + this.decoratedValue = decoratedValue; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + writer.object(); + writer.key("v"); decoratedValue.write(writer, options); + writer.key("c"); writer.value(count); + writer.key("s"); writer.value(selected); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/browsing/facets/RangeFacet.java b/main/src/com/google/refine/browsing/facets/RangeFacet.java new file mode 100644 index 000000000..6b63cf11e --- /dev/null +++ b/main/src/com/google/refine/browsing/facets/RangeFacet.java @@ -0,0 +1,259 @@ +package com.google.refine.browsing.facets; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.browsing.FilteredRecords; +import com.google.refine.browsing.FilteredRows; +import com.google.refine.browsing.RecordFilter; +import com.google.refine.browsing.RowFilter; +import com.google.refine.browsing.filters.AnyRowRecordFilter; +import com.google.refine.browsing.filters.ExpressionNumberComparisonRowFilter; +import com.google.refine.browsing.util.ExpressionBasedRowEvaluable; +import com.google.refine.browsing.util.ExpressionNumericValueBinner; +import com.google.refine.browsing.util.NumericBinIndex; +import com.google.refine.browsing.util.NumericBinRecordIndex; +import com.google.refine.browsing.util.NumericBinRowIndex; +import com.google.refine.browsing.util.RowEvaluable; +import com.google.refine.expr.Evaluable; +import com.google.refine.expr.MetaParser; +import com.google.refine.expr.ParsingException; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.util.JSONUtilities; + +public class RangeFacet implements Facet { + /* + * Configuration, from the client side + */ + protected String _name; // name of facet + protected String _expression; // expression to compute numeric value(s) per row + protected String _columnName; // column to base expression on, if any + + protected double _from; // the numeric selection + protected double _to; + + protected boolean _selectNumeric; // whether the numeric selection applies, default true + protected boolean _selectNonNumeric; + protected boolean _selectBlank; + protected boolean _selectError; + + /* + * Derived configuration data + */ + protected int _cellIndex; + protected Evaluable _eval; + protected String _errorMessage; + protected boolean _selected; // false if we're certain that all rows will match + // and there isn't any filtering to do + + /* + * Computed data, to return to the client side + */ + protected double _min; + protected double _max; + protected double _step; + protected int[] _baseBins; + protected int[] _bins; + + protected int _baseNumericCount; + protected int _baseNonNumericCount; + protected int _baseBlankCount; + protected int _baseErrorCount; + + protected int _numericCount; + protected int _nonNumericCount; + protected int _blankCount; + protected int _errorCount; + + public RangeFacet() { + } + + protected static final String MIN = "min"; + protected static final String MAX = "max"; + protected static final String TO = "to"; + protected static final String FROM = "from"; + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("name"); writer.value(_name); + writer.key("expression"); writer.value(_expression); + writer.key("columnName"); writer.value(_columnName); + + if (_errorMessage != null) { + writer.key("error"); writer.value(_errorMessage); + } else { + if (!Double.isInfinite(_min) && !Double.isInfinite(_max)) { + writer.key(MIN); writer.value(_min); + writer.key(MAX); writer.value(_max); + writer.key("step"); writer.value(_step); + + writer.key("bins"); writer.array(); + for (int b : _bins) { + writer.value(b); + } + writer.endArray(); + + writer.key("baseBins"); writer.array(); + for (int b : _baseBins) { + writer.value(b); + } + writer.endArray(); + + writer.key(FROM); writer.value(_from); + writer.key(TO); writer.value(_to); + } + + writer.key("baseNumericCount"); writer.value(_baseNumericCount); + writer.key("baseNonNumericCount"); writer.value(_baseNonNumericCount); + writer.key("baseBlankCount"); writer.value(_baseBlankCount); + writer.key("baseErrorCount"); writer.value(_baseErrorCount); + + writer.key("numericCount"); writer.value(_numericCount); + writer.key("nonNumericCount"); writer.value(_nonNumericCount); + writer.key("blankCount"); writer.value(_blankCount); + writer.key("errorCount"); writer.value(_errorCount); + } + writer.endObject(); + } + + public void initializeFromJSON(Project project, JSONObject o) throws Exception { + _name = o.getString("name"); + _expression = o.getString("expression"); + _columnName = o.getString("columnName"); + + if (_columnName.length() > 0) { + Column column = project.columnModel.getColumnByName(_columnName); + if (column != null) { + _cellIndex = column.getCellIndex(); + } else { + _errorMessage = "No column named " + _columnName; + } + } else { + _cellIndex = -1; + } + + try { + _eval = MetaParser.parse(_expression); + } catch (ParsingException e) { + _errorMessage = e.getMessage(); + } + + if (o.has(FROM) || o.has(TO)) { + _from = o.has(FROM) ? o.getDouble(FROM) : _min; + _to = o.has(TO) ? o.getDouble(TO) : _max; + _selected = true; + } + + _selectNumeric = JSONUtilities.getBoolean(o, "selectNumeric", true); + _selectNonNumeric = JSONUtilities.getBoolean(o, "selectNonNumeric", true); + _selectBlank = JSONUtilities.getBoolean(o, "selectBlank", true); + _selectError = JSONUtilities.getBoolean(o, "selectError", true); + + if (!_selectNumeric || !_selectNonNumeric || !_selectBlank || !_selectError) { + _selected = true; + } + } + + public RowFilter getRowFilter(Project project) { + if (_eval != null && _errorMessage == null && _selected) { + return new ExpressionNumberComparisonRowFilter( + getRowEvaluable(project), _selectNumeric, _selectNonNumeric, _selectBlank, _selectError) { + + protected boolean checkValue(double d) { + return d >= _from && d < _to; + }; + }; + } else { + return null; + } + } + + @Override + public RecordFilter getRecordFilter(Project project) { + RowFilter rowFilter = getRowFilter(project); + return rowFilter == null ? null : new AnyRowRecordFilter(rowFilter); + } + + public void computeChoices(Project project, FilteredRows filteredRows) { + if (_eval != null && _errorMessage == null) { + RowEvaluable rowEvaluable = getRowEvaluable(project); + + Column column = project.columnModel.getColumnByCellIndex(_cellIndex); + String key = "numeric-bin:row-based:" + _expression; + NumericBinIndex index = (NumericBinIndex) column.getPrecompute(key); + if (index == null) { + index = new NumericBinRowIndex(project, rowEvaluable); + column.setPrecompute(key, index); + } + + retrieveDataFromBaseBinIndex(index); + + ExpressionNumericValueBinner binner = + new ExpressionNumericValueBinner(rowEvaluable, index); + + filteredRows.accept(project, binner); + retrieveDataFromBinner(binner); + } + } + + public void computeChoices(Project project, FilteredRecords filteredRecords) { + if (_eval != null && _errorMessage == null) { + RowEvaluable rowEvaluable = getRowEvaluable(project); + + Column column = project.columnModel.getColumnByCellIndex(_cellIndex); + String key = "numeric-bin:record-based:" + _expression; + NumericBinIndex index = (NumericBinIndex) column.getPrecompute(key); + if (index == null) { + index = new NumericBinRecordIndex(project, rowEvaluable); + column.setPrecompute(key, index); + } + + retrieveDataFromBaseBinIndex(index); + + ExpressionNumericValueBinner binner = + new ExpressionNumericValueBinner(rowEvaluable, index); + + filteredRecords.accept(project, binner); + + retrieveDataFromBinner(binner); + } + } + + protected RowEvaluable getRowEvaluable(Project project) { + return new ExpressionBasedRowEvaluable(_columnName, _cellIndex, _eval); + } + + protected void retrieveDataFromBaseBinIndex(NumericBinIndex index) { + _min = index.getMin(); + _max = index.getMax(); + _step = index.getStep(); + _baseBins = index.getBins(); + + _baseNumericCount = index.getNumericRowCount(); + _baseNonNumericCount = index.getNonNumericRowCount(); + _baseBlankCount = index.getBlankRowCount(); + _baseErrorCount = index.getErrorRowCount(); + + if (_selected) { + _from = Math.max(_from, _min); + _to = Math.min(_to, _max); + } else { + _from = _min; + _to = _max; + } + } + + protected void retrieveDataFromBinner(ExpressionNumericValueBinner binner) { + _bins = binner.bins; + _numericCount = binner.numericCount; + _nonNumericCount = binner.nonNumericCount; + _blankCount = binner.blankCount; + _errorCount = binner.errorCount; + } +} diff --git a/main/src/com/google/refine/browsing/facets/ScatterplotDrawingRowVisitor.java b/main/src/com/google/refine/browsing/facets/ScatterplotDrawingRowVisitor.java new file mode 100644 index 000000000..749bb5540 --- /dev/null +++ b/main/src/com/google/refine/browsing/facets/ScatterplotDrawingRowVisitor.java @@ -0,0 +1,133 @@ +package com.google.refine.browsing.facets; + +import java.awt.BasicStroke; +import java.awt.Color; +import java.awt.Graphics2D; +import java.awt.RenderingHints; +import java.awt.geom.AffineTransform; +import java.awt.geom.Point2D; +import java.awt.geom.Rectangle2D; +import java.awt.image.BufferedImage; +import java.awt.image.RenderedImage; + +import com.google.refine.browsing.RecordVisitor; +import com.google.refine.browsing.RowVisitor; +import com.google.refine.model.Cell; +import com.google.refine.model.Project; +import com.google.refine.model.Record; +import com.google.refine.model.Row; + +public class ScatterplotDrawingRowVisitor implements RowVisitor, RecordVisitor { + + int col_x; + int col_y; + int dim_x; + int dim_y; + int rotation; + + double l; + double dot; + + double min_x; + double max_x; + double min_y; + double max_y; + + BufferedImage image; + Graphics2D g2; + + AffineTransform r; + + public ScatterplotDrawingRowVisitor( + int col_x, int col_y, double min_x, double max_x, double min_y, double max_y, + int size, int dim_x, int dim_y, int rotation, double dot, Color color) + { + this.col_x = col_x; + this.col_y = col_y; + this.min_x = min_x; + this.min_y = min_y; + this.max_x = max_x; + this.max_y = max_y; + this.dot = dot; + this.dim_x = dim_x; + this.dim_y = dim_y; + this.rotation = rotation; + + l = (double) size; + r = ScatterplotFacet.createRotationMatrix(rotation, l); + + image = new BufferedImage(size, size, BufferedImage.TYPE_4BYTE_ABGR); + g2 = (Graphics2D) image.getGraphics(); + g2.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON); + g2.setStroke(new BasicStroke(1.0f)); + + AffineTransform t = AffineTransform.getTranslateInstance(0, l); + t.scale(1, -1); + + g2.setTransform(t); + g2.setColor(color); + g2.setPaint(color); + + if (r != null) { + /* + * Fill in the negative quadrants to give a hint of how the plot has been rotated. + */ + Graphics2D g2r = (Graphics2D) g2.create(); + g2r.transform(r); + + g2r.setPaint(Color.lightGray); + g2r.fillRect(-size, 0, size, size); + g2r.fillRect(0, -size, size, size); + g2r.dispose(); + } + } + + public void setColor(Color color) { + g2.setColor(color); + g2.setPaint(color); + } + + @Override + public void start(Project project) { + // nothing to do + } + + @Override + public void end(Project project) { + // nothing to do + } + + @Override + public boolean visit(Project project, int rowIndex, Row row) { + Cell cellx = row.getCell(col_x); + Cell celly = row.getCell(col_y); + if ((cellx != null && cellx.value != null && cellx.value instanceof Number) && + (celly != null && celly.value != null && celly.value instanceof Number)) + { + double xv = ((Number) cellx.value).doubleValue(); + double yv = ((Number) celly.value).doubleValue(); + + Point2D.Double p = new Point2D.Double(xv,yv); + + p = ScatterplotFacet.translateCoordinates( + p, min_x, max_x, min_y, max_y, dim_x, dim_y, l, r); + + g2.fill(new Rectangle2D.Double(p.x - dot / 2, p.y - dot / 2, dot, dot)); + } + + return false; + } + + @Override + public boolean visit(Project project, Record record) { + for (int r = record.fromRowIndex; r < record.toRowIndex; r++) { + visit(project, r, project.rows.get(r)); + } + return false; + } + + public RenderedImage getImage() { + return image; + } +} + diff --git a/main/src/com/google/refine/browsing/facets/ScatterplotFacet.java b/main/src/com/google/refine/browsing/facets/ScatterplotFacet.java new file mode 100644 index 000000000..52b507164 --- /dev/null +++ b/main/src/com/google/refine/browsing/facets/ScatterplotFacet.java @@ -0,0 +1,452 @@ +package com.google.refine.browsing.facets; + +import java.awt.Color; +import java.awt.geom.AffineTransform; +import java.awt.geom.Point2D; +import java.awt.image.BufferedImage; +import java.awt.image.RenderedImage; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.Properties; + +import javax.imageio.ImageIO; + +import org.apache.commons.codec.binary.Base64; +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.refine.browsing.FilteredRecords; +import com.google.refine.browsing.FilteredRows; +import com.google.refine.browsing.RecordFilter; +import com.google.refine.browsing.RowFilter; +import com.google.refine.browsing.filters.AnyRowRecordFilter; +import com.google.refine.browsing.filters.DualExpressionsNumberComparisonRowFilter; +import com.google.refine.browsing.util.ExpressionBasedRowEvaluable; +import com.google.refine.browsing.util.NumericBinIndex; +import com.google.refine.browsing.util.NumericBinRecordIndex; +import com.google.refine.browsing.util.NumericBinRowIndex; +import com.google.refine.expr.Evaluable; +import com.google.refine.expr.MetaParser; +import com.google.refine.expr.ParsingException; +import com.google.refine.model.Column; +import com.google.refine.model.Project; + +public class ScatterplotFacet implements Facet { + + public static final int LIN = 0; + public static final int LOG = 1; + + public static final int NO_ROTATION = 0; + public static final int ROTATE_CW = 1; + public static final int ROTATE_CCW = 2; + + /* + * Configuration, from the client side + */ + protected String name; // name of facet + + protected String expression_x; // expression to compute the x numeric value(s) per row + protected String expression_y; // expression to compute the y numeric value(s) per row + protected String columnName_x; // column to base the x expression on, if any + protected String columnName_y; // column to base the y expression on, if any + + protected int size; + protected int dim_x; + protected int dim_y; + protected int rotation; + + protected double l; + protected double dot; + + protected String image; + + protected String color_str; + protected Color color; + + protected double from_x; // the numeric selection for the x axis, from 0 to 1 + protected double to_x; + protected double from_y; // the numeric selection for the y axis, from 0 to 1 + protected double to_y; + + /* + * Derived configuration data + */ + protected int columnIndex_x; + protected int columnIndex_y; + protected Evaluable eval_x; + protected Evaluable eval_y; + protected String errorMessage_x; + protected String errorMessage_y; + + protected double min_x; + protected double max_x; + protected double min_y; + protected double max_y; + protected AffineTransform t; + + protected boolean selected; // false if we're certain that all rows will match + // and there isn't any filtering to do + + public static final String NAME = "name"; + public static final String IMAGE = "image"; + public static final String COLOR = "color"; + public static final String BASE_COLOR = "base_color"; + public static final String SIZE = "l"; + public static final String ROTATION = "r"; + public static final String DOT = "dot"; + public static final String DIM_X = "dim_x"; + public static final String DIM_Y = "dim_y"; + + public static final String X_COLUMN_NAME = "cx"; + public static final String X_EXPRESSION = "ex"; + public static final String MIN_X = "min_x"; + public static final String MAX_X = "max_x"; + public static final String TO_X = "to_x"; + public static final String FROM_X = "from_x"; + public static final String ERROR_X = "error_x"; + + public static final String Y_COLUMN_NAME = "cy"; + public static final String Y_EXPRESSION = "ey"; + public static final String MIN_Y = "min_y"; + public static final String MAX_Y = "max_y"; + public static final String TO_Y = "to_y"; + public static final String FROM_Y = "from_y"; + public static final String ERROR_Y = "error_y"; + + private static final boolean IMAGE_URI = false; + + public static String EMPTY_IMAGE; + + final static Logger logger = LoggerFactory.getLogger("scatterplot_facet"); + + static { + try { + EMPTY_IMAGE = serializeImage(new BufferedImage(1, 1, BufferedImage.TYPE_4BYTE_ABGR)); + } catch (IOException e) { + EMPTY_IMAGE = ""; + } + } + + public void write(JSONWriter writer, Properties options) throws JSONException { + + writer.object(); + + writer.key(NAME); writer.value(name); + writer.key(X_COLUMN_NAME); writer.value(columnName_x); + writer.key(X_EXPRESSION); writer.value(expression_x); + writer.key(Y_COLUMN_NAME); writer.value(columnName_y); + writer.key(Y_EXPRESSION); writer.value(expression_y); + writer.key(SIZE); writer.value(size); + writer.key(DOT); writer.value(dot); + writer.key(ROTATION); writer.value(rotation); + writer.key(DIM_X); writer.value(dim_x); + writer.key(DIM_Y); writer.value(dim_y); + writer.key(COLOR); writer.value(color_str); + + if (IMAGE_URI) { + writer.key(IMAGE); writer.value(image); + } + + if (errorMessage_x != null) { + writer.key(ERROR_X); writer.value(errorMessage_x); + } else { + if (!Double.isInfinite(min_x) && !Double.isInfinite(max_x)) { + writer.key(FROM_X); writer.value(from_x); + writer.key(TO_X); writer.value(to_x); + } + } + + if (errorMessage_y != null) { + writer.key(ERROR_Y); writer.value(errorMessage_y); + } else { + if (!Double.isInfinite(min_y) && !Double.isInfinite(max_y)) { + writer.key(FROM_Y); writer.value(from_y); + writer.key(TO_Y); writer.value(to_y); + } + } + + writer.endObject(); + } + + public void initializeFromJSON(Project project, JSONObject o) throws Exception { + name = o.getString(NAME); + l = size = (o.has(SIZE)) ? o.getInt(SIZE) : 100; + dot = (o.has(DOT)) ? o.getInt(DOT) : 0.5d; + + dim_x = (o.has(DIM_X)) ? getAxisDim(o.getString(DIM_X)) : LIN; + if (o.has(FROM_X) && o.has(TO_X)) { + from_x = o.getDouble(FROM_X); + to_x = o.getDouble(TO_X); + selected = true; + } else { + from_x = 0; + to_x = 1; + } + + dim_y = (o.has(DIM_Y)) ? getAxisDim(o.getString(DIM_Y)) : LIN; + if (o.has(FROM_Y) && o.has(TO_Y)) { + from_y = o.getDouble(FROM_Y); + to_y = o.getDouble(TO_Y); + selected = true; + } else { + from_y = 0; + to_y = 1; + } + + rotation = (o.has(ROTATION)) ? getRotation(o.getString(ROTATION)) : NO_ROTATION; + t = createRotationMatrix(rotation, l); + + color_str = (o.has(COLOR)) ? o.getString(COLOR) : "000000"; + color = new Color(Integer.parseInt(color_str,16)); + + columnName_x = o.getString(X_COLUMN_NAME); + expression_x = o.getString(X_EXPRESSION); + + if (columnName_x.length() > 0) { + Column x_column = project.columnModel.getColumnByName(columnName_x); + if (x_column != null) { + columnIndex_x = x_column.getCellIndex(); + + NumericBinIndex index_x = ScatterplotFacet.getBinIndex(project, x_column, eval_x, expression_x); + min_x = index_x.getMin(); + max_x = index_x.getMax(); + } else { + errorMessage_x = "No column named " + columnName_x; + } + } else { + columnIndex_x = -1; + } + + try { + eval_x = MetaParser.parse(expression_x); + } catch (ParsingException e) { + errorMessage_x = e.getMessage(); + } + + columnName_y = o.getString(Y_COLUMN_NAME); + expression_y = o.getString(Y_EXPRESSION); + + if (columnName_y.length() > 0) { + Column y_column = project.columnModel.getColumnByName(columnName_y); + if (y_column != null) { + columnIndex_y = y_column.getCellIndex(); + + NumericBinIndex index_y = ScatterplotFacet.getBinIndex(project, y_column, eval_y, expression_y); + min_y = index_y.getMin(); + max_y = index_y.getMax(); + } else { + errorMessage_y = "No column named " + columnName_y; + } + } else { + columnIndex_y = -1; + } + + try { + eval_y = MetaParser.parse(expression_y); + } catch (ParsingException e) { + errorMessage_y = e.getMessage(); + } + + } + + public RowFilter getRowFilter(Project project) { + if (selected && + eval_x != null && errorMessage_x == null && + eval_y != null && errorMessage_y == null) + { + return new DualExpressionsNumberComparisonRowFilter( + eval_x, columnName_x, columnIndex_x, eval_y, columnName_y, columnIndex_y) { + + double from_x_pixels = from_x * l; + double to_x_pixels = to_x * l; + double from_y_pixels = from_y * l; + double to_y_pixels = to_y * l; + + protected boolean checkValues(double x, double y) { + Point2D.Double p = new Point2D.Double(x,y); + p = translateCoordinates(p, min_x, max_x, min_y, max_y, dim_x, dim_y, l, t); + return p.x >= from_x_pixels && p.x <= to_x_pixels && p.y >= from_y_pixels && p.y <= to_y_pixels; + }; + }; + } else { + return null; + } + } + + @Override + public RecordFilter getRecordFilter(Project project) { + RowFilter rowFilter = getRowFilter(project); + return rowFilter == null ? null : new AnyRowRecordFilter(rowFilter); + } + + public void computeChoices(Project project, FilteredRows filteredRows) { + if (eval_x != null && eval_y != null && errorMessage_x == null && errorMessage_y == null) { + Column column_x = project.columnModel.getColumnByCellIndex(columnIndex_x); + NumericBinIndex index_x = getBinIndex(project, column_x, eval_x, expression_x, "row-based"); + + Column column_y = project.columnModel.getColumnByCellIndex(columnIndex_y); + NumericBinIndex index_y = getBinIndex(project, column_y, eval_y, expression_y, "row-based"); + + retrieveDataFromBinIndices(index_x, index_y); + + if (IMAGE_URI) { + if (index_x.isNumeric() && index_y.isNumeric()) { + ScatterplotDrawingRowVisitor drawer = new ScatterplotDrawingRowVisitor( + columnIndex_x, columnIndex_y, min_x, max_x, min_y, max_y, + size, dim_x, dim_y, rotation, dot, color + ); + filteredRows.accept(project, drawer); + + try { + image = serializeImage(drawer.getImage()); + } catch (IOException e) { + logger.warn("Exception caught while generating the image", e); + } + } else { + image = EMPTY_IMAGE; + } + } + } + } + + public void computeChoices(Project project, FilteredRecords filteredRecords) { + if (eval_x != null && eval_y != null && errorMessage_x == null && errorMessage_y == null) { + Column column_x = project.columnModel.getColumnByCellIndex(columnIndex_x); + NumericBinIndex index_x = getBinIndex(project, column_x, eval_x, expression_x, "record-based"); + + Column column_y = project.columnModel.getColumnByCellIndex(columnIndex_y); + NumericBinIndex index_y = getBinIndex(project, column_y, eval_y, expression_y, "record-based"); + + retrieveDataFromBinIndices(index_x, index_y); + + if (IMAGE_URI) { + if (index_x.isNumeric() && index_y.isNumeric()) { + ScatterplotDrawingRowVisitor drawer = new ScatterplotDrawingRowVisitor( + columnIndex_x, columnIndex_y, min_x, max_x, min_y, max_y, + size, dim_x, dim_y, rotation, dot, color + ); + filteredRecords.accept(project, drawer); + + try { + image = serializeImage(drawer.getImage()); + } catch (IOException e) { + logger.warn("Exception caught while generating the image", e); + } + } else { + image = EMPTY_IMAGE; + } + } + } + } + + protected void retrieveDataFromBinIndices(NumericBinIndex index_x, NumericBinIndex index_y) { + min_x = index_x.getMin(); + max_x = index_x.getMax(); + + min_y = index_y.getMin(); + max_y = index_y.getMax(); + } + + public static String serializeImage(RenderedImage image) throws IOException { + ByteArrayOutputStream output = new ByteArrayOutputStream(4096); + ImageIO.write(image, "png", output); + output.close(); + String encoded = Base64.encodeBase64String(output.toByteArray()); + String url = "data:image/png;base64," + encoded; + return url; + } + + public static int getAxisDim(String type) { + return ("log".equals(type.toLowerCase())) ? LOG : LIN; + } + + public static int getRotation(String rotation) { + rotation = rotation.toLowerCase(); + if ("cw".equals(rotation) || "right".equals(rotation)) { + return ScatterplotFacet.ROTATE_CW; + } else if ("ccw".equals(rotation) || "left".equals(rotation)) { + return ScatterplotFacet.ROTATE_CCW; + } else { + return NO_ROTATION; + } + } + + public static NumericBinIndex getBinIndex(Project project, Column column, Evaluable eval, String expression) { + return getBinIndex(project, column, eval, expression, "row-based"); + } + + public static NumericBinIndex getBinIndex(Project project, Column column, Evaluable eval, String expression, String mode) { + String key = "numeric-bin:" + mode + ":" + expression; + if (eval == null) { + try { + eval = MetaParser.parse(expression); + } catch (ParsingException e) { + logger.warn("Error parsing expression",e); + } + } + NumericBinIndex index = (NumericBinIndex) column.getPrecompute(key); + if (index == null) { + index = "row-based".equals(mode) ? + new NumericBinRowIndex(project, new ExpressionBasedRowEvaluable(column.getName(), column.getCellIndex(), eval)) : + new NumericBinRecordIndex(project, new ExpressionBasedRowEvaluable(column.getName(), column.getCellIndex(), eval)); + + column.setPrecompute(key, index); + } + return index; + } + + private static double s_rotateScale = 1 / Math.sqrt(2.0); + + public static AffineTransform createRotationMatrix(int rotation, double l) { + if (rotation == ScatterplotFacet.ROTATE_CW) { + AffineTransform t = AffineTransform.getTranslateInstance(0, l / 2); + t.scale(s_rotateScale, s_rotateScale); + t.rotate(-Math.PI / 4); + return t; + } else if (rotation == ScatterplotFacet.ROTATE_CCW) { + AffineTransform t = AffineTransform.getTranslateInstance(l / 2, 0); + t.scale(s_rotateScale, s_rotateScale); + t.rotate(Math.PI / 4); + return t; + } else { + return null; + } + } + + public static Point2D.Double translateCoordinates( + Point2D.Double p, + double min_x, double max_x, double min_y, double max_y, + int dim_x, int dim_y, double l, AffineTransform t) { + + double x = p.x; + double y = p.y; + + double relative_x = x - min_x; + double range_x = max_x - min_x; + if (dim_x == ScatterplotFacet.LOG) { + x = Math.log10(relative_x + 1) * l / Math.log10(range_x + 1); + } else { + x = relative_x * l / range_x; + } + + double relative_y = y - min_y; + double range_y = max_y - min_y; + if (dim_y == ScatterplotFacet.LOG) { + y = Math.log10(relative_y + 1) * l / Math.log10(range_y + 1); + } else { + y = relative_y * l / range_y; + } + + p.x = x; + p.y = y; + if (t != null) { + t.transform(p, p); + } + + return p; + } + +} diff --git a/main/src/com/google/refine/browsing/facets/TextSearchFacet.java b/main/src/com/google/refine/browsing/facets/TextSearchFacet.java new file mode 100644 index 000000000..e97649a90 --- /dev/null +++ b/main/src/com/google/refine/browsing/facets/TextSearchFacet.java @@ -0,0 +1,120 @@ +package com.google.refine.browsing.facets; + +import java.util.Properties; +import java.util.regex.Pattern; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.browsing.FilteredRecords; +import com.google.refine.browsing.FilteredRows; +import com.google.refine.browsing.RecordFilter; +import com.google.refine.browsing.RowFilter; +import com.google.refine.browsing.filters.AnyRowRecordFilter; +import com.google.refine.browsing.filters.ExpressionStringComparisonRowFilter; +import com.google.refine.expr.Evaluable; +import com.google.refine.gel.ast.VariableExpr; +import com.google.refine.model.Project; + +public class TextSearchFacet implements Facet { + /* + * Configuration + */ + protected String _name; + protected String _columnName; + protected String _query; + protected String _mode; + protected boolean _caseSensitive; + + /* + * Derived configuration + */ + protected int _cellIndex; + protected Pattern _pattern; + + public TextSearchFacet() { + } + + @Override + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("name"); writer.value(_name); + writer.key("columnName"); writer.value(_columnName); + writer.key("query"); writer.value(_query); + writer.key("mode"); writer.value(_mode); + writer.key("caseSensitive"); writer.value(_caseSensitive); + writer.endObject(); + } + + @Override + public void initializeFromJSON(Project project, JSONObject o) throws Exception { + _name = o.getString("name"); + _columnName = o.getString("columnName"); + + _cellIndex = project.columnModel.getColumnByName(_columnName).getCellIndex(); + + if (!o.isNull("query")) { + _query = o.getString("query"); + } + + _mode = o.getString("mode"); + _caseSensitive = o.getBoolean("caseSensitive"); + if (_query != null) { + if ("regex".equals(_mode)) { + try { + _pattern = Pattern.compile( + _query, + _caseSensitive ? 0 : Pattern.CASE_INSENSITIVE); + } catch (java.util.regex.PatternSyntaxException e) { + e.printStackTrace(); + } + } else if (!_caseSensitive) { + _query = _query.toLowerCase(); + } + } + } + + @Override + public RowFilter getRowFilter(Project project) { + if (_query == null || _query.length() == 0) { + return null; + } else if ("regex".equals(_mode) && _pattern == null) { + return null; + } + + Evaluable eval = new VariableExpr("value"); + + if ("regex".equals(_mode)) { + return new ExpressionStringComparisonRowFilter(eval, _columnName, _cellIndex) { + protected boolean checkValue(String s) { + return _pattern.matcher(s).find(); + }; + }; + } else { + return new ExpressionStringComparisonRowFilter(eval, _columnName, _cellIndex) { + protected boolean checkValue(String s) { + return (_caseSensitive ? s : s.toLowerCase()).contains(_query); + }; + }; + } + } + + @Override + public RecordFilter getRecordFilter(Project project) { + RowFilter rowFilter = getRowFilter(project); + return rowFilter == null ? null : new AnyRowRecordFilter(rowFilter); + } + + @Override + public void computeChoices(Project project, FilteredRows filteredRows) { + // nothing to do + } + + @Override + public void computeChoices(Project project, FilteredRecords filteredRecords) { + // nothing to do + } +} diff --git a/main/src/com/google/refine/browsing/facets/TimeRangeFacet.java b/main/src/com/google/refine/browsing/facets/TimeRangeFacet.java new file mode 100644 index 000000000..5944ac587 --- /dev/null +++ b/main/src/com/google/refine/browsing/facets/TimeRangeFacet.java @@ -0,0 +1,201 @@ +package com.google.refine.browsing.facets; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.browsing.FilteredRecords; +import com.google.refine.browsing.FilteredRows; +import com.google.refine.browsing.RowFilter; +import com.google.refine.browsing.filters.ExpressionTimeComparisonRowFilter; +import com.google.refine.browsing.util.ExpressionTimeValueBinner; +import com.google.refine.browsing.util.RowEvaluable; +import com.google.refine.browsing.util.TimeBinIndex; +import com.google.refine.browsing.util.TimeBinRecordIndex; +import com.google.refine.browsing.util.TimeBinRowIndex; +import com.google.refine.expr.MetaParser; +import com.google.refine.expr.ParsingException; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.util.JSONUtilities; + +public class TimeRangeFacet extends RangeFacet { + + protected boolean _selectTime; // whether the time selection applies, default true + protected boolean _selectNonTime; + + protected int _baseTimeCount; + protected int _baseNonTimeCount; + + protected int _timeCount; + protected int _nonTimeCount; + + public void write(JSONWriter writer, Properties options) throws JSONException { + + writer.object(); + writer.key("name"); writer.value(_name); + writer.key("expression"); writer.value(_expression); + writer.key("columnName"); writer.value(_columnName); + + if (_errorMessage != null) { + writer.key("error"); writer.value(_errorMessage); + } else { + if (!Double.isInfinite(_min) && !Double.isInfinite(_max)) { + writer.key(MIN); writer.value(_min); + writer.key(MAX); writer.value(_max); + writer.key("step"); writer.value(_step); + + writer.key("bins"); writer.array(); + for (int b : _bins) { + writer.value(b); + } + writer.endArray(); + + writer.key("baseBins"); writer.array(); + for (int b : _baseBins) { + writer.value(b); + } + writer.endArray(); + + writer.key(FROM); writer.value(_from); + writer.key(TO); writer.value(_to); + } + + writer.key("baseTimeCount"); writer.value(_baseTimeCount); + writer.key("baseNonTimeCount"); writer.value(_baseNonTimeCount); + writer.key("baseBlankCount"); writer.value(_baseBlankCount); + writer.key("baseErrorCount"); writer.value(_baseErrorCount); + + writer.key("timeCount"); writer.value(_timeCount); + writer.key("nonTimeCount"); writer.value(_nonTimeCount); + writer.key("blankCount"); writer.value(_blankCount); + writer.key("errorCount"); writer.value(_errorCount); + } + writer.endObject(); + } + + public void initializeFromJSON(Project project, JSONObject o) throws Exception { + _name = o.getString("name"); + _expression = o.getString("expression"); + _columnName = o.getString("columnName"); + + if (_columnName.length() > 0) { + Column column = project.columnModel.getColumnByName(_columnName); + if (column != null) { + _cellIndex = column.getCellIndex(); + } else { + _errorMessage = "No column named " + _columnName; + } + } else { + _cellIndex = -1; + } + + try { + _eval = MetaParser.parse(_expression); + } catch (ParsingException e) { + _errorMessage = e.getMessage(); + } + + if (o.has(FROM) || o.has(TO)) { + _from = o.has(FROM) ? o.getDouble(FROM) : _min; + _to = o.has(TO) ? o.getDouble(TO) : _max; + _selected = true; + } + + _selectTime = JSONUtilities.getBoolean(o, "selectTime", true); + _selectNonTime = JSONUtilities.getBoolean(o, "selectNonTime", true); + _selectBlank = JSONUtilities.getBoolean(o, "selectBlank", true); + _selectError = JSONUtilities.getBoolean(o, "selectError", true); + + if (!_selectTime || !_selectNonTime || !_selectBlank || !_selectError) { + _selected = true; + } + } + + public RowFilter getRowFilter(Project project) { + if (_eval != null && _errorMessage == null && _selected) { + return new ExpressionTimeComparisonRowFilter( + getRowEvaluable(project), _selectTime, _selectNonTime, _selectBlank, _selectError) { + + protected boolean checkValue(long t) { + return t >= _from && t < _to; + }; + }; + } else { + return null; + } + } + + public void computeChoices(Project project, FilteredRows filteredRows) { + if (_eval != null && _errorMessage == null) { + RowEvaluable rowEvaluable = getRowEvaluable(project); + + Column column = project.columnModel.getColumnByCellIndex(_cellIndex); + String key = "time-bin:row-based:" + _expression; + TimeBinIndex index = (TimeBinIndex) column.getPrecompute(key); + if (index == null) { + index = new TimeBinRowIndex(project, rowEvaluable); + column.setPrecompute(key, index); + } + + retrieveDataFromBaseBinIndex(index); + + ExpressionTimeValueBinner binner = new ExpressionTimeValueBinner(rowEvaluable, index); + + filteredRows.accept(project, binner); + retrieveDataFromBinner(binner); + } + } + + public void computeChoices(Project project, FilteredRecords filteredRecords) { + if (_eval != null && _errorMessage == null) { + RowEvaluable rowEvaluable = getRowEvaluable(project); + + Column column = project.columnModel.getColumnByCellIndex(_cellIndex); + String key = "time-bin:record-based:" + _expression; + TimeBinIndex index = (TimeBinIndex) column.getPrecompute(key); + if (index == null) { + index = new TimeBinRecordIndex(project, rowEvaluable); + column.setPrecompute(key, index); + } + + retrieveDataFromBaseBinIndex(index); + + ExpressionTimeValueBinner binner = new ExpressionTimeValueBinner(rowEvaluable, index); + + filteredRecords.accept(project, binner); + + retrieveDataFromBinner(binner); + } + } + + protected void retrieveDataFromBaseBinIndex(TimeBinIndex index) { + _min = index.getMin(); + _max = index.getMax(); + _step = index.getStep(); + _baseBins = index.getBins(); + + _baseTimeCount = index.getTimeRowCount(); + _baseNonTimeCount = index.getNonTimeRowCount(); + _baseBlankCount = index.getBlankRowCount(); + _baseErrorCount = index.getErrorRowCount(); + + if (_selected) { + _from = Math.max(_from, _min); + _to = Math.min(_to, _max); + } else { + _from = _min; + _to = _max; + } + } + + protected void retrieveDataFromBinner(ExpressionTimeValueBinner binner) { + _bins = binner.bins; + _timeCount = binner.timeCount; + _nonTimeCount = binner.nonTimeCount; + _blankCount = binner.blankCount; + _errorCount = binner.errorCount; + } +} diff --git a/main/src/com/google/refine/browsing/filters/AllRowsRecordFilter.java b/main/src/com/google/refine/browsing/filters/AllRowsRecordFilter.java new file mode 100644 index 000000000..748500f9c --- /dev/null +++ b/main/src/com/google/refine/browsing/filters/AllRowsRecordFilter.java @@ -0,0 +1,24 @@ +package com.google.refine.browsing.filters; + +import com.google.refine.browsing.RecordFilter; +import com.google.refine.browsing.RowFilter; +import com.google.refine.model.Project; +import com.google.refine.model.Record; + +public class AllRowsRecordFilter implements RecordFilter { + final protected RowFilter _rowFilter; + + public AllRowsRecordFilter(RowFilter rowFilter) { + _rowFilter = rowFilter; + } + + @Override + public boolean filterRecord(Project project, Record record) { + for (int r = record.fromRowIndex; r < record.toRowIndex; r++) { + if (!_rowFilter.filterRow(project, r, project.rows.get(r))) { + return false; + } + } + return true; + } +} diff --git a/main/src/com/google/refine/browsing/filters/AnyRowRecordFilter.java b/main/src/com/google/refine/browsing/filters/AnyRowRecordFilter.java new file mode 100644 index 000000000..0849b943f --- /dev/null +++ b/main/src/com/google/refine/browsing/filters/AnyRowRecordFilter.java @@ -0,0 +1,24 @@ +package com.google.refine.browsing.filters; + +import com.google.refine.browsing.RecordFilter; +import com.google.refine.browsing.RowFilter; +import com.google.refine.model.Project; +import com.google.refine.model.Record; + +public class AnyRowRecordFilter implements RecordFilter { + final protected RowFilter _rowFilter; + + public AnyRowRecordFilter(RowFilter rowFilter) { + _rowFilter = rowFilter; + } + + @Override + public boolean filterRecord(Project project, Record record) { + for (int r = record.fromRowIndex; r < record.toRowIndex; r++) { + if (_rowFilter.filterRow(project, r, project.rows.get(r))) { + return true; + } + } + return false; + } +} diff --git a/main/src/com/google/refine/browsing/filters/DualExpressionsNumberComparisonRowFilter.java b/main/src/com/google/refine/browsing/filters/DualExpressionsNumberComparisonRowFilter.java new file mode 100644 index 000000000..06688ad19 --- /dev/null +++ b/main/src/com/google/refine/browsing/filters/DualExpressionsNumberComparisonRowFilter.java @@ -0,0 +1,85 @@ +package com.google.refine.browsing.filters; + +import java.util.Collection; +import java.util.Properties; + +import com.google.refine.browsing.RowFilter; +import com.google.refine.expr.Evaluable; +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.model.Cell; +import com.google.refine.model.Project; +import com.google.refine.model.Row; + +/** + * Judge if a row matches by evaluating two given expressions on the row, based on two different columns + * and checking the results. It's a match if the result satisfies some numeric comparisons. + */ +abstract public class DualExpressionsNumberComparisonRowFilter implements RowFilter { + + final protected Evaluable _x_evaluable; + final protected String _x_columnName; + final protected int _x_cellIndex; + final protected Evaluable _y_evaluable; + final protected String _y_columnName; + final protected int _y_cellIndex; + + public DualExpressionsNumberComparisonRowFilter ( + Evaluable x_evaluable, + String x_columnName, + int x_cellIndex, + Evaluable y_evaluable, + String y_columnName, + int y_cellIndex + ) { + _x_evaluable = x_evaluable; + _x_columnName = x_columnName; + _x_cellIndex = x_cellIndex; + _y_evaluable = y_evaluable; + _y_columnName = y_columnName; + _y_cellIndex = y_cellIndex; + } + + public boolean filterRow(Project project, int rowIndex, Row row) { + Cell x_cell = _x_cellIndex < 0 ? null : row.getCell(_x_cellIndex); + Properties x_bindings = ExpressionUtils.createBindings(project); + ExpressionUtils.bind(x_bindings, row, rowIndex, _x_columnName, x_cell); + Object x_value = _x_evaluable.evaluate(x_bindings); + + Cell y_cell = _y_cellIndex < 0 ? null : row.getCell(_y_cellIndex); + Properties y_bindings = ExpressionUtils.createBindings(project); + ExpressionUtils.bind(y_bindings, row, rowIndex, _y_columnName, y_cell); + Object y_value = _y_evaluable.evaluate(y_bindings); + + if (x_value != null && y_value != null) { + if (x_value.getClass().isArray() || y_value.getClass().isArray()) { + return false; + } else if (x_value instanceof Collection || y_value instanceof Collection) { + return false; + } // else, fall through + } + + return checkValue(x_value,y_value); + } + + protected boolean checkValue(Object vx, Object vy) { + if (ExpressionUtils.isError(vx) || ExpressionUtils.isError(vy)) { + return false; + } else if (ExpressionUtils.isNonBlankData(vx) && ExpressionUtils.isNonBlankData(vy)) { + if (vx instanceof Number && vy instanceof Number) { + double dx = ((Number) vx).doubleValue(); + double dy = ((Number) vy).doubleValue(); + return (!Double.isInfinite(dx) && + !Double.isNaN(dx) && + !Double.isInfinite(dy) && + !Double.isNaN(dy) && + checkValues(dx,dy)); + } else { + return false; + } + } else { + return false; + } + } + + abstract protected boolean checkValues(double dx, double dy); +} diff --git a/main/src/com/google/refine/browsing/filters/ExpressionEqualRowFilter.java b/main/src/com/google/refine/browsing/filters/ExpressionEqualRowFilter.java new file mode 100644 index 000000000..41c387740 --- /dev/null +++ b/main/src/com/google/refine/browsing/filters/ExpressionEqualRowFilter.java @@ -0,0 +1,164 @@ +package com.google.refine.browsing.filters; + +import java.util.Collection; +import java.util.Properties; + +import org.json.JSONArray; +import org.json.JSONException; + +import com.google.refine.browsing.RowFilter; +import com.google.refine.expr.Evaluable; +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.model.Cell; +import com.google.refine.model.Project; +import com.google.refine.model.Row; + +/** + * Judge if a row matches by evaluating a given expression on the row, based on a particular + * column, and checking the result. It's a match if the result is any one of a given list of + * values, or if the result is blank or error and we want blank or error values. + */ +public class ExpressionEqualRowFilter implements RowFilter { + final protected Evaluable _evaluable; // the expression to evaluate + + final protected String _columnName; + final protected int _cellIndex; // the expression is based on this column; + // -1 if based on no column in particular, + // for expression such as "row.starred". + + final protected Object[] _matches; + final protected boolean _selectBlank; + final protected boolean _selectError; + final protected boolean _invert; + + public ExpressionEqualRowFilter( + Evaluable evaluable, + String columnName, + int cellIndex, + Object[] matches, + boolean selectBlank, + boolean selectError, + boolean invert + ) { + _evaluable = evaluable; + _columnName = columnName; + _cellIndex = cellIndex; + _matches = matches; + _selectBlank = selectBlank; + _selectError = selectError; + _invert = invert; + } + + public boolean filterRow(Project project, int rowIndex, Row row) { + return _invert ? + internalInvertedFilterRow(project, rowIndex, row) : + internalFilterRow(project, rowIndex, row); + } + + public boolean internalFilterRow(Project project, int rowIndex, Row row) { + Cell cell = _cellIndex < 0 ? null : row.getCell(_cellIndex); + + Properties bindings = ExpressionUtils.createBindings(project); + ExpressionUtils.bind(bindings, row, rowIndex, _columnName, cell); + + Object value = _evaluable.evaluate(bindings); + if (value != null) { + if (value.getClass().isArray()) { + Object[] a = (Object[]) value; + for (Object v : a) { + if (testValue(v)) { + return true; + } + } + return false; + } else if (value instanceof Collection) { + for (Object v : ExpressionUtils.toObjectCollection(value)) { + if (testValue(v)) { + return true; + } + } + return false; + } else if (value instanceof JSONArray) { + JSONArray a = (JSONArray) value; + int l = a.length(); + + for (int i = 0; i < l; i++) { + try { + if (testValue(a.get(i))) { + return true; + } + } catch (JSONException e) { + // ignore + } + } + return false; + } // else, fall through + } + + return testValue(value); + } + + public boolean internalInvertedFilterRow(Project project, int rowIndex, Row row) { + Cell cell = _cellIndex < 0 ? null : row.getCell(_cellIndex); + + Properties bindings = ExpressionUtils.createBindings(project); + ExpressionUtils.bind(bindings, row, rowIndex, _columnName, cell); + + Object value = _evaluable.evaluate(bindings); + if (value != null) { + if (value.getClass().isArray()) { + Object[] a = (Object[]) value; + for (Object v : a) { + if (testValue(v)) { + return false; + } + } + return true; + } else if (value instanceof Collection) { + for (Object v : ExpressionUtils.toObjectCollection(value)) { + if (testValue(v)) { + return false; + } + } + return true; + } else if (value instanceof JSONArray) { + JSONArray a = (JSONArray) value; + int l = a.length(); + + for (int i = 0; i < l; i++) { + try { + if (testValue(a.get(i))) { + return false; + } + } catch (JSONException e) { + // ignore + } + } + return true; + } // else, fall through + } + + return !testValue(value); + } + + protected boolean testValue(Object v) { + if (ExpressionUtils.isError(v)) { + return _selectError; + } else if (ExpressionUtils.isNonBlankData(v)) { + for (Object match : _matches) { + if (testValue(v, match)) { + return true; + } + } + return false; + } else { + return _selectBlank; + } + } + + protected boolean testValue(Object v, Object match) { + return (v instanceof Number && match instanceof Number) ? + ((Number) match).doubleValue() == ((Number) v).doubleValue() : + match.equals(v); + } +} diff --git a/main/src/com/google/refine/browsing/filters/ExpressionNumberComparisonRowFilter.java b/main/src/com/google/refine/browsing/filters/ExpressionNumberComparisonRowFilter.java new file mode 100644 index 000000000..0ee7f5a10 --- /dev/null +++ b/main/src/com/google/refine/browsing/filters/ExpressionNumberComparisonRowFilter.java @@ -0,0 +1,102 @@ +package com.google.refine.browsing.filters; + +import java.util.Collection; +import java.util.Properties; + +import org.json.JSONArray; +import org.json.JSONException; + +import com.google.refine.browsing.RowFilter; +import com.google.refine.browsing.util.RowEvaluable; +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.model.Project; +import com.google.refine.model.Row; + +/** + * Judge if a row matches by evaluating a given expression on the row, based on a particular + * column, and checking the result. It's a match if the result satisfies some numeric comparisons, + * or if the result is non-numeric or blank or error and we want non-numeric or blank or error + * values. + */ +abstract public class ExpressionNumberComparisonRowFilter implements RowFilter { + final protected RowEvaluable _rowEvaluable; + final protected boolean _selectNumeric; + final protected boolean _selectNonNumeric; + final protected boolean _selectBlank; + final protected boolean _selectError; + + public ExpressionNumberComparisonRowFilter( + RowEvaluable rowEvaluable, + boolean selectNumeric, + boolean selectNonNumeric, + boolean selectBlank, + boolean selectError + ) { + _rowEvaluable = rowEvaluable; + _selectNumeric = selectNumeric; + _selectNonNumeric = selectNonNumeric; + _selectBlank = selectBlank; + _selectError = selectError; + } + + public boolean filterRow(Project project, int rowIndex, Row row) { + Properties bindings = ExpressionUtils.createBindings(project); + + Object value = _rowEvaluable.eval(project, rowIndex, row, bindings); + if (value != null) { + if (value.getClass().isArray()) { + Object[] a = (Object[]) value; + for (Object v : a) { + if (checkValue(v)) { + return true; + } + } + return false; + } else if (value instanceof Collection) { + for (Object v : ExpressionUtils.toObjectCollection(value)) { + if (checkValue(v)) { + return true; + } + } + return false; + } else if (value instanceof JSONArray) { + JSONArray a = (JSONArray) value; + int l = a.length(); + + for (int i = 0; i < l; i++) { + try { + if (checkValue(a.get(i))) { + return true; + } + } catch (JSONException e) { + // ignore + } + } + return false; + } // else, fall through + } + + return checkValue(value); + } + + protected boolean checkValue(Object v) { + if (ExpressionUtils.isError(v)) { + return _selectError; + } else if (ExpressionUtils.isNonBlankData(v)) { + if (v instanceof Number) { + double d = ((Number) v).doubleValue(); + if (Double.isInfinite(d) || Double.isNaN(d)) { + return _selectError; + } else { + return _selectNumeric && checkValue(d); + } + } else { + return _selectNonNumeric; + } + } else { + return _selectBlank; + } + } + + abstract protected boolean checkValue(double d); +} diff --git a/main/src/com/google/refine/browsing/filters/ExpressionStringComparisonRowFilter.java b/main/src/com/google/refine/browsing/filters/ExpressionStringComparisonRowFilter.java new file mode 100644 index 000000000..092ae11a8 --- /dev/null +++ b/main/src/com/google/refine/browsing/filters/ExpressionStringComparisonRowFilter.java @@ -0,0 +1,77 @@ +package com.google.refine.browsing.filters; + +import java.util.Collection; +import java.util.Properties; + +import org.json.JSONArray; +import org.json.JSONException; + +import com.google.refine.browsing.RowFilter; +import com.google.refine.expr.Evaluable; +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.model.Cell; +import com.google.refine.model.Project; +import com.google.refine.model.Row; + +/** + * Judge if a row matches by evaluating a given expression on the row, based on a particular + * column, and checking the result. It's a match if the result satisfies some string comparisons. + */ +abstract public class ExpressionStringComparisonRowFilter implements RowFilter { + final protected Evaluable _evaluable; + final protected String _columnName; + final protected int _cellIndex; + + public ExpressionStringComparisonRowFilter(Evaluable evaluable, String columnName, int cellIndex) { + _evaluable = evaluable; + _columnName = columnName; + _cellIndex = cellIndex; + } + + public boolean filterRow(Project project, int rowIndex, Row row) { + Cell cell = _cellIndex < 0 ? null : row.getCell(_cellIndex); + + Properties bindings = ExpressionUtils.createBindings(project); + ExpressionUtils.bind(bindings, row, rowIndex, _columnName, cell); + + Object value = _evaluable.evaluate(bindings); + if (value != null) { + if (value.getClass().isArray()) { + Object[] a = (Object[]) value; + for (Object v : a) { + if (checkValue(v instanceof String ? ((String) v) : v.toString())) { + return true; + } + } + } else if (value instanceof Collection) { + for (Object v : ExpressionUtils.toObjectCollection(value)) { + if (checkValue(v.toString())) { + return true; + } + } + return false; + } else if (value instanceof JSONArray) { + JSONArray a = (JSONArray) value; + int l = a.length(); + + for (int i = 0; i < l; i++) { + try { + if (checkValue(a.get(i).toString())) { + return true; + } + } catch (JSONException e) { + // ignore + } + } + return false; + } else { + if (checkValue(value instanceof String ? ((String) value) : value.toString())) { + return true; + } + } + } + return false; + } + + abstract protected boolean checkValue(String s); +} diff --git a/main/src/com/google/refine/browsing/filters/ExpressionTimeComparisonRowFilter.java b/main/src/com/google/refine/browsing/filters/ExpressionTimeComparisonRowFilter.java new file mode 100644 index 000000000..3bc987888 --- /dev/null +++ b/main/src/com/google/refine/browsing/filters/ExpressionTimeComparisonRowFilter.java @@ -0,0 +1,52 @@ +package com.google.refine.browsing.filters; + +import java.util.Date; + +import com.google.refine.browsing.util.RowEvaluable; +import com.google.refine.expr.ExpressionUtils; + +/** + * Judge if a row matches by evaluating a given expression on the row, based on a particular + * column, and checking the result. It's a match if the result satisfies some time comparisons, + * or if the result is not a time or blank or error and we want non-time or blank or error + * values. + */ +abstract public class ExpressionTimeComparisonRowFilter extends ExpressionNumberComparisonRowFilter { + + final protected boolean _selectTime; + final protected boolean _selectNonTime; + + public ExpressionTimeComparisonRowFilter( + RowEvaluable rowEvaluable, + boolean selectTime, + boolean selectNonTime, + boolean selectBlank, + boolean selectError + ) { + super(rowEvaluable, selectTime, selectNonTime, selectBlank, selectError); + _selectTime = selectTime; + _selectNonTime = selectNonTime; + } + + protected boolean checkValue(Object v) { + if (ExpressionUtils.isError(v)) { + return _selectError; + } else if (ExpressionUtils.isNonBlankData(v)) { + if (v instanceof Date) { + long time = ((Date) v).getTime(); + return _selectTime && checkValue(time); + } else { + return _selectNonTime; + } + } else { + return _selectBlank; + } + } + + // not really needed for operation, just to make extending the abstract class possible + protected boolean checkValue(double d) { + return false; + } + + abstract protected boolean checkValue(long d); +} diff --git a/main/src/com/google/refine/browsing/util/ConjunctiveFilteredRecords.java b/main/src/com/google/refine/browsing/util/ConjunctiveFilteredRecords.java new file mode 100644 index 000000000..ff418868d --- /dev/null +++ b/main/src/com/google/refine/browsing/util/ConjunctiveFilteredRecords.java @@ -0,0 +1,49 @@ +package com.google.refine.browsing.util; + +import java.util.LinkedList; +import java.util.List; + +import com.google.refine.browsing.FilteredRecords; +import com.google.refine.browsing.RecordFilter; +import com.google.refine.browsing.RecordVisitor; +import com.google.refine.model.Project; +import com.google.refine.model.Record; + +/** + * Encapsulate logic for visiting records that match all given record filters. + */ +public class ConjunctiveFilteredRecords implements FilteredRecords { + final protected List _recordFilters = new LinkedList(); + + public void add(RecordFilter recordFilter) { + _recordFilters.add(recordFilter); + } + + @Override + public void accept(Project project, RecordVisitor visitor) { + try { + visitor.start(project); + + int c = project.recordModel.getRecordCount(); + for (int r = 0; r < c; r++) { + Record record = project.recordModel.getRecord(r); + if (matchRecord(project, record)) { + if (visitor.visit(project, record)) { + return; + } + } + } + } finally { + visitor.end(project); + } + } + + protected boolean matchRecord(Project project, Record record) { + for (RecordFilter recordFilter : _recordFilters) { + if (!recordFilter.filterRecord(project, record)) { + return false; + } + } + return true; + } +} diff --git a/main/src/com/google/refine/browsing/util/ConjunctiveFilteredRows.java b/main/src/com/google/refine/browsing/util/ConjunctiveFilteredRows.java new file mode 100644 index 000000000..5c35e79fe --- /dev/null +++ b/main/src/com/google/refine/browsing/util/ConjunctiveFilteredRows.java @@ -0,0 +1,51 @@ +package com.google.refine.browsing.util; + +import java.util.LinkedList; +import java.util.List; + +import com.google.refine.browsing.FilteredRows; +import com.google.refine.browsing.RowFilter; +import com.google.refine.browsing.RowVisitor; +import com.google.refine.model.Project; +import com.google.refine.model.Row; + +/** + * Encapsulate logic for visiting rows that match all give row filters. Also visit + * context rows and dependent rows if configured so. + */ +public class ConjunctiveFilteredRows implements FilteredRows { + final protected List _rowFilters = new LinkedList(); + + public void add(RowFilter rowFilter) { + _rowFilters.add(rowFilter); + } + + public void accept(Project project, RowVisitor visitor) { + try { + visitor.start(project); + + int c = project.rows.size(); + for (int rowIndex = 0; rowIndex < c; rowIndex++) { + Row row = project.rows.get(rowIndex); + if (matchRow(project, rowIndex, row)) { + visitRow(project, visitor, rowIndex, row); + } + } + } finally { + visitor.end(project); + } + } + + protected void visitRow(Project project, RowVisitor visitor, int rowIndex, Row row) { + visitor.visit(project, rowIndex, row); + } + + protected boolean matchRow(Project project, int rowIndex, Row row) { + for (RowFilter rowFilter : _rowFilters) { + if (!rowFilter.filterRow(project, rowIndex, row)) { + return false; + } + } + return true; + } +} diff --git a/main/src/com/google/refine/browsing/util/ExpressionBasedRowEvaluable.java b/main/src/com/google/refine/browsing/util/ExpressionBasedRowEvaluable.java new file mode 100644 index 000000000..897843a4d --- /dev/null +++ b/main/src/com/google/refine/browsing/util/ExpressionBasedRowEvaluable.java @@ -0,0 +1,34 @@ +package com.google.refine.browsing.util; + +import java.util.Properties; + +import com.google.refine.expr.Evaluable; +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.model.Cell; +import com.google.refine.model.Project; +import com.google.refine.model.Row; + +public class ExpressionBasedRowEvaluable implements RowEvaluable { + final protected String _columnName; + final protected int _cellIndex; + final protected Evaluable _eval; + + public ExpressionBasedRowEvaluable( + String columnName, int cellIndex, Evaluable eval) { + + _columnName = columnName; + _cellIndex = cellIndex; + _eval = eval; + } + + @Override + public Object eval( + Project project, int rowIndex, Row row, Properties bindings) { + + Cell cell = row.getCell(_cellIndex); + + ExpressionUtils.bind(bindings, row, rowIndex, _columnName, cell); + + return _eval.evaluate(bindings); + } +} diff --git a/main/src/com/google/refine/browsing/util/ExpressionNominalValueGrouper.java b/main/src/com/google/refine/browsing/util/ExpressionNominalValueGrouper.java new file mode 100644 index 000000000..2b29dbcaf --- /dev/null +++ b/main/src/com/google/refine/browsing/util/ExpressionNominalValueGrouper.java @@ -0,0 +1,211 @@ +package com.google.refine.browsing.util; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; + +import com.google.refine.browsing.DecoratedValue; +import com.google.refine.browsing.RecordVisitor; +import com.google.refine.browsing.RowVisitor; +import com.google.refine.browsing.facets.NominalFacetChoice; +import com.google.refine.expr.Evaluable; +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.model.Cell; +import com.google.refine.model.Project; +import com.google.refine.model.Record; +import com.google.refine.model.Row; + +/** + * Visit matched rows or records and group them into facet choices based on the values computed + * from a given expression. + */ +public class ExpressionNominalValueGrouper implements RowVisitor, RecordVisitor { + static public class IndexedNominalFacetChoice extends NominalFacetChoice { + int _latestIndex; + + public IndexedNominalFacetChoice(DecoratedValue decoratedValue, int latestIndex) { + super(decoratedValue); + _latestIndex = latestIndex; + } + } + + /* + * Configuration + */ + final protected Evaluable _evaluable; + final protected String _columnName; + final protected int _cellIndex; + + /* + * Computed results + */ + final public Map choices = new HashMap(); + public int blankCount = 0; + public int errorCount = 0; + + /* + * Scratch pad variables + */ + protected boolean hasBlank; + protected boolean hasError; + + public ExpressionNominalValueGrouper(Evaluable evaluable, String columnName, int cellIndex) { + _evaluable = evaluable; + _columnName = columnName; + _cellIndex = cellIndex; + } + + @Override + public void start(Project project) { + // nothing to do + } + + @Override + public void end(Project project) { + // nothing to do + } + + public boolean visit(Project project, int rowIndex, Row row) { + hasError = false; + hasBlank = false; + + Properties bindings = ExpressionUtils.createBindings(project); + + visitRow(project, rowIndex, row, bindings, rowIndex); + + if (hasError) { + errorCount++; + } + if (hasBlank) { + blankCount++; + } + + return false; + } + + @Override + public boolean visit(Project project, Record record) { + hasError = false; + hasBlank = false; + + Properties bindings = ExpressionUtils.createBindings(project); + + for (int r = record.fromRowIndex; r < record.toRowIndex; r++) { + Row row = project.rows.get(r); + visitRow(project, r, row, bindings, record.recordIndex); + } + + if (hasError) { + errorCount++; + } + if (hasBlank) { + blankCount++; + } + + return false; + } + + protected void visitRow(Project project, int rowIndex, Row row, Properties bindings, int index) { + Object value = evalRow(project, rowIndex, row, bindings); + if (value != null) { + if (value.getClass().isArray()) { + Object[] a = (Object[]) value; + for (Object v : a) { + processValue(v, rowIndex); + } + } else if (value instanceof Collection) { + for (Object v : ExpressionUtils.toObjectCollection(value)) { + processValue(v, rowIndex); + } + } else { + processValue(value, rowIndex); + } + } else { + processValue(value, rowIndex); + } + } + + protected Object evalRow(Project project, int rowIndex, Row row, Properties bindings) { + Cell cell = _cellIndex < 0 ? null : row.getCell(_cellIndex); + + ExpressionUtils.bind(bindings, row, rowIndex, _columnName, cell); + + return _evaluable.evaluate(bindings); + } + + protected void processValue(Object value, int index) { + if (ExpressionUtils.isError(value)) { + hasError = true; + } else if (ExpressionUtils.isNonBlankData(value)) { + String valueString = value.toString(); + IndexedNominalFacetChoice facetChoice = choices.get(valueString); + + if (facetChoice != null) { + if (facetChoice._latestIndex < index) { + facetChoice._latestIndex = index; + facetChoice.count++; + } + } else { + String label = value.toString(); + DecoratedValue dValue = new DecoratedValue(value, label); + IndexedNominalFacetChoice choice = + new IndexedNominalFacetChoice(dValue, index); + + choice.count = 1; + choices.put(valueString, choice); + } + } else { + hasBlank = true; + } + } + + public RowEvaluable getChoiceCountRowEvaluable() { + return new RowEvaluable() { + @Override + public Object eval(Project project, int rowIndex, Row row, Properties bindings) { + Object value = evalRow(project, rowIndex, row, bindings); + return getChoiceValueCountMultiple(value); + } + + }; + } + + public Object getChoiceValueCountMultiple(Object value) { + if (value != null) { + if (value.getClass().isArray()) { + Object[] choiceValues = (Object[]) value; + List counts = new ArrayList(choiceValues.length); + + for (int i = 0; i < choiceValues.length; i++) { + counts.add(getChoiceValueCount(choiceValues[i])); + } + return counts; + } else if (value instanceof Collection) { + List choiceValues = ExpressionUtils.toObjectList(value); + List counts = new ArrayList(choiceValues.size()); + + int count = choiceValues.size(); + for (int i = 0; i < count; i++) { + counts.add(getChoiceValueCount(choiceValues.get(i))); + } + return counts; + } + } + + return getChoiceValueCount(value); + } + + public Integer getChoiceValueCount(Object choiceValue) { + if (ExpressionUtils.isError(choiceValue)) { + return errorCount; + } else if (ExpressionUtils.isNonBlankData(choiceValue)) { + IndexedNominalFacetChoice choice = choices.get(choiceValue); + return choice != null ? choice.count : 0; + } else { + return blankCount; + } + } +} diff --git a/main/src/com/google/refine/browsing/util/ExpressionNumericValueBinner.java b/main/src/com/google/refine/browsing/util/ExpressionNumericValueBinner.java new file mode 100644 index 000000000..0b1cc4d04 --- /dev/null +++ b/main/src/com/google/refine/browsing/util/ExpressionNumericValueBinner.java @@ -0,0 +1,148 @@ +package com.google.refine.browsing.util; + +import java.util.Collection; +import java.util.Properties; + +import com.google.refine.browsing.RecordVisitor; +import com.google.refine.browsing.RowVisitor; +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.model.Project; +import com.google.refine.model.Record; +import com.google.refine.model.Row; + +/** + * Visit matched rows or records and slot them into bins based on the numbers computed + * from a given expression. + */ +public class ExpressionNumericValueBinner implements RowVisitor, RecordVisitor { + /* + * Configuration + */ + final protected RowEvaluable _rowEvaluable; + final protected NumericBinIndex _index; // base bins + + /* + * Computed results + */ + final public int[] bins; + public int numericCount; + public int nonNumericCount; + public int blankCount; + public int errorCount; + + /* + * Scratchpad variables + */ + protected boolean hasError; + protected boolean hasBlank; + protected boolean hasNumeric; + protected boolean hasNonNumeric; + + public ExpressionNumericValueBinner(RowEvaluable rowEvaluable, NumericBinIndex index) { + _rowEvaluable = rowEvaluable; + _index = index; + bins = new int[_index.getBins().length]; + } + + @Override + public void start(Project project) { + // nothing to do + } + + @Override + public void end(Project project) { + // nothing to do + } + + @Override + public boolean visit(Project project, int rowIndex, Row row) { + resetFlags(); + + Properties bindings = ExpressionUtils.createBindings(project); + processRow(project, rowIndex, row, bindings); + + updateCounts(); + + return false; + } + + @Override + public boolean visit(Project project, Record record) { + resetFlags(); + + Properties bindings = ExpressionUtils.createBindings(project); + for (int r = record.fromRowIndex; r < record.toRowIndex; r++) { + processRow(project, r, project.rows.get(r), bindings); + } + + updateCounts(); + + return false; + } + + protected void resetFlags() { + hasError = false; + hasBlank = false; + hasNumeric = false; + hasNonNumeric = false; + } + + protected void updateCounts() { + if (hasError) { + errorCount++; + } + if (hasBlank) { + blankCount++; + } + if (hasNumeric) { + numericCount++; + } + if (hasNonNumeric) { + nonNumericCount++; + } + } + + protected void processRow(Project project, int rowIndex, Row row, Properties bindings) { + Object value = _rowEvaluable.eval(project, rowIndex, row, bindings); + if (value != null) { + if (value.getClass().isArray()) { + Object[] a = (Object[]) value; + for (Object v : a) { + processValue(v); + } + return; + } else if (value instanceof Collection) { + for (Object v : ExpressionUtils.toObjectCollection(value)) { + processValue(v); + } + return; + } // else, fall through + } + + processValue(value); + } + + protected void processValue(Object value) { + if (ExpressionUtils.isError(value)) { + hasError = true; + } else if (ExpressionUtils.isNonBlankData(value)) { + if (value instanceof Number) { + double d = ((Number) value).doubleValue(); + if (!Double.isInfinite(d) && !Double.isNaN(d)) { + hasNumeric = true; + + int bin = (int) Math.floor((d - _index.getMin()) / _index.getStep()); + if (bin >= 0 && bin < bins.length) { // as a precaution + bins[bin]++; + } + } else { + hasError = true; + } + } else { + hasNonNumeric = true; + } + } else { + hasBlank = true; + } + } +} diff --git a/main/src/com/google/refine/browsing/util/ExpressionTimeValueBinner.java b/main/src/com/google/refine/browsing/util/ExpressionTimeValueBinner.java new file mode 100644 index 000000000..7dbeac3b7 --- /dev/null +++ b/main/src/com/google/refine/browsing/util/ExpressionTimeValueBinner.java @@ -0,0 +1,146 @@ +package com.google.refine.browsing.util; + +import java.util.Collection; +import java.util.Date; +import java.util.Properties; + +import com.google.refine.browsing.RecordVisitor; +import com.google.refine.browsing.RowVisitor; +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.model.Project; +import com.google.refine.model.Record; +import com.google.refine.model.Row; + +/** + * Visit matched rows or records and slot them into bins based on the date computed + * from a given expression. + */ +public class ExpressionTimeValueBinner implements RowVisitor, RecordVisitor { + + /* + * Configuration + */ + final protected RowEvaluable _rowEvaluable; + final protected TimeBinIndex _index; // base bins + + /* + * Computed results + */ + final public int[] bins; + public int timeCount; + public int nonTimeCount; + public int blankCount; + public int errorCount; + + /* + * Scratchpad variables + */ + protected boolean hasError; + protected boolean hasBlank; + protected boolean hasTime; + protected boolean hasNonTime; + + public ExpressionTimeValueBinner(RowEvaluable rowEvaluable, TimeBinIndex index) { + _rowEvaluable = rowEvaluable; + _index = index; + bins = new int[_index.getBins().length]; + } + + @Override + public void start(Project project) { + // nothing to do + } + + @Override + public void end(Project project) { + // nothing to do + } + + @Override + public boolean visit(Project project, int rowIndex, Row row) { + resetFlags(); + + Properties bindings = ExpressionUtils.createBindings(project); + processRow(project, rowIndex, row, bindings); + + updateCounts(); + + return false; + } + + @Override + public boolean visit(Project project, Record record) { + resetFlags(); + + Properties bindings = ExpressionUtils.createBindings(project); + for (int r = record.fromRowIndex; r < record.toRowIndex; r++) { + processRow(project, r, project.rows.get(r), bindings); + } + + updateCounts(); + + return false; + } + + protected void resetFlags() { + hasError = false; + hasBlank = false; + hasTime = false; + hasNonTime = false; + } + + protected void updateCounts() { + if (hasError) { + errorCount++; + } + if (hasBlank) { + blankCount++; + } + if (hasTime) { + timeCount++; + } + if (hasNonTime) { + nonTimeCount++; + } + } + + protected void processRow(Project project, int rowIndex, Row row, Properties bindings) { + Object value = _rowEvaluable.eval(project, rowIndex, row, bindings); + if (value != null) { + if (value.getClass().isArray()) { + Object[] a = (Object[]) value; + for (Object v : a) { + processValue(v); + } + return; + } else if (value instanceof Collection) { + for (Object v : ExpressionUtils.toObjectCollection(value)) { + processValue(v); + } + return; + } // else, fall through + } + + processValue(value); + } + + protected void processValue(Object value) { + if (ExpressionUtils.isError(value)) { + hasError = true; + } else if (ExpressionUtils.isNonBlankData(value)) { + if (value instanceof Date) { + long t = ((Date) value).getTime(); + hasTime = true; + + int bin = (int) Math.floor((t - _index.getMin()) / _index.getStep()); + if (bin >= 0 && bin < bins.length) { // as a precaution + bins[bin]++; + } + } else { + hasNonTime = true; + } + } else { + hasBlank = true; + } + } +} diff --git a/main/src/com/google/refine/browsing/util/FilteredRecordsAsFilteredRows.java b/main/src/com/google/refine/browsing/util/FilteredRecordsAsFilteredRows.java new file mode 100644 index 000000000..27b762686 --- /dev/null +++ b/main/src/com/google/refine/browsing/util/FilteredRecordsAsFilteredRows.java @@ -0,0 +1,20 @@ +package com.google.refine.browsing.util; + +import com.google.refine.browsing.FilteredRecords; +import com.google.refine.browsing.FilteredRows; +import com.google.refine.browsing.RowVisitor; +import com.google.refine.model.Project; + +public class FilteredRecordsAsFilteredRows implements FilteredRows { + final protected FilteredRecords _filteredRecords; + + public FilteredRecordsAsFilteredRows(FilteredRecords filteredRecords) { + _filteredRecords = filteredRecords; + } + + @Override + public void accept(Project project, RowVisitor visitor) { + _filteredRecords.accept(project, new RowVisitorAsRecordVisitor(visitor)); + } + +} diff --git a/main/src/com/google/refine/browsing/util/NumericBinIndex.java b/main/src/com/google/refine/browsing/util/NumericBinIndex.java new file mode 100644 index 000000000..59c4cf957 --- /dev/null +++ b/main/src/com/google/refine/browsing/util/NumericBinIndex.java @@ -0,0 +1,226 @@ +package com.google.refine.browsing.util; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Properties; + +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.model.Project; +import com.google.refine.model.Row; + +/** + * A utility class for computing the base bins that form the base histograms of + * numeric range facets. It evaluates an expression on all the rows of a project to + * get numeric values, determines how many bins to distribute those values in, and + * bins the rows accordingly. + * + * This class processes all rows rather than just the filtered rows because it + * needs to compute the base bins of a numeric range facet, which remain unchanged + * as the user interacts with the facet. + */ +abstract public class NumericBinIndex { + + protected int _totalValueCount; + protected int _numbericValueCount; + protected double _min; + protected double _max; + protected double _step; + protected int[] _bins; + + protected int _numericRowCount; + protected int _nonNumericRowCount; + protected int _blankRowCount; + protected int _errorRowCount; + + protected boolean _hasError = false; + protected boolean _hasNonNumeric = false; + protected boolean _hasNumeric = false; + protected boolean _hasBlank = false; + + abstract protected void iterate(Project project, RowEvaluable rowEvaluable, List allValues); + + public NumericBinIndex(Project project, RowEvaluable rowEvaluable) { + _min = Double.POSITIVE_INFINITY; + _max = Double.NEGATIVE_INFINITY; + + List allValues = new ArrayList(); + + iterate(project, rowEvaluable, allValues); + + _numbericValueCount = allValues.size(); + + if (_min >= _max) { + _step = 1; + _min = Math.min(_min, _max); + _max = _step; + _bins = new int[1]; + + return; + } + + double diff = _max - _min; + + _step = 1; + if (diff > 10) { + while (_step * 100 < diff) { + _step *= 10; + } + } else { + while (_step * 100 > diff) { + _step /= 10; + } + } + + double originalMax = _max; + _min = (Math.floor(_min / _step) * _step); + _max = (Math.ceil(_max / _step) * _step); + + double binCount = (_max - _min) / _step; + if (binCount > 100) { + _step *= 2; + binCount = (binCount + 1) / 2; + } + + if (_max <= originalMax) { + _max += _step; + binCount++; + } + + _bins = new int[(int) Math.round(binCount)]; + for (double d : allValues) { + int bin = Math.max((int) Math.floor((d - _min) / _step),0); + _bins[bin]++; + } + } + + public boolean isNumeric() { + return _numbericValueCount > _totalValueCount / 2; + } + + public double getMin() { + return _min; + } + + public double getMax() { + return _max; + } + + public double getStep() { + return _step; + } + + public int[] getBins() { + return _bins; + } + + public int getNumericRowCount() { + return _numericRowCount; + } + + public int getNonNumericRowCount() { + return _nonNumericRowCount; + } + + public int getBlankRowCount() { + return _blankRowCount; + } + + public int getErrorRowCount() { + return _errorRowCount; + } + + protected void processRow( + Project project, + RowEvaluable rowEvaluable, + List allValues, + int rowIndex, + Row row, + Properties bindings + ) { + Object value = rowEvaluable.eval(project, rowIndex, row, bindings); + + if (ExpressionUtils.isError(value)) { + _hasError = true; + } else if (ExpressionUtils.isNonBlankData(value)) { + if (value.getClass().isArray()) { + Object[] a = (Object[]) value; + for (Object v : a) { + _totalValueCount++; + + if (ExpressionUtils.isError(v)) { + _hasError = true; + } else if (ExpressionUtils.isNonBlankData(v)) { + if (v instanceof Number) { + _hasNumeric = true; + processValue(((Number) v).doubleValue(), allValues); + } else { + _hasNonNumeric = true; + } + } else { + _hasBlank = true; + } + } + } else if (value instanceof Collection) { + for (Object v : ExpressionUtils.toObjectCollection(value)) { + _totalValueCount++; + + if (ExpressionUtils.isError(v)) { + _hasError = true; + } else if (ExpressionUtils.isNonBlankData(v)) { + if (v instanceof Number) { + _hasNumeric = true; + processValue(((Number) v).doubleValue(), allValues); + } else { + _hasNonNumeric = true; + } + } else { + _hasBlank = true; + } + } + } else { + _totalValueCount++; + + if (value instanceof Number) { + _hasNumeric = true; + processValue(((Number) value).doubleValue(), allValues); + } else { + _hasNonNumeric = true; + } + } + } else { + _hasBlank = true; + } + } + + protected void preprocessing() { + _hasBlank = false; + _hasError = false; + _hasNonNumeric = false; + _hasNumeric = false; + } + + protected void postprocessing() { + if (_hasError) { + _errorRowCount++; + } + if (_hasBlank) { + _blankRowCount++; + } + if (_hasNumeric) { + _numericRowCount++; + } + if (_hasNonNumeric) { + _nonNumericRowCount++; + } + } + + protected void processValue(double v, List allValues) { + if (!Double.isInfinite(v) && !Double.isNaN(v)) { + _min = Math.min(_min, v); + _max = Math.max(_max, v); + allValues.add(v); + } + } + +} diff --git a/main/src/com/google/refine/browsing/util/NumericBinRecordIndex.java b/main/src/com/google/refine/browsing/util/NumericBinRecordIndex.java new file mode 100644 index 000000000..763e73817 --- /dev/null +++ b/main/src/com/google/refine/browsing/util/NumericBinRecordIndex.java @@ -0,0 +1,38 @@ +package com.google.refine.browsing.util; + +import java.util.List; +import java.util.Properties; + +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.model.Project; +import com.google.refine.model.Record; +import com.google.refine.model.Row; + +public class NumericBinRecordIndex extends NumericBinIndex { + public NumericBinRecordIndex(Project project, RowEvaluable rowEvaluable) { + super(project, rowEvaluable); + } + + @Override + protected void iterate( + Project project, RowEvaluable rowEvaluable, List allValues) { + + Properties bindings = ExpressionUtils.createBindings(project); + int count = project.recordModel.getRecordCount(); + + for (int r = 0; r < count; r++) { + Record record = project.recordModel.getRecord(r); + + preprocessing(); + + for (int i = record.fromRowIndex; i < record.toRowIndex; i++) { + Row row = project.rows.get(i); + + processRow(project, rowEvaluable, allValues, i, row, bindings); + } + + postprocessing(); + } + } + +} diff --git a/main/src/com/google/refine/browsing/util/NumericBinRowIndex.java b/main/src/com/google/refine/browsing/util/NumericBinRowIndex.java new file mode 100644 index 000000000..c67493793 --- /dev/null +++ b/main/src/com/google/refine/browsing/util/NumericBinRowIndex.java @@ -0,0 +1,33 @@ +package com.google.refine.browsing.util; + +import java.util.List; +import java.util.Properties; + +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.model.Project; +import com.google.refine.model.Row; + +public class NumericBinRowIndex extends NumericBinIndex { + public NumericBinRowIndex(Project project, RowEvaluable rowEvaluable) { + + super(project, rowEvaluable); + } + + @Override + protected void iterate( + Project project, RowEvaluable rowEvaluable, List allValues) { + + Properties bindings = ExpressionUtils.createBindings(project); + + for (int i = 0; i < project.rows.size(); i++) { + Row row = project.rows.get(i); + + preprocessing(); + + processRow(project, rowEvaluable, allValues, i, row, bindings); + + postprocessing(); + } + } + +} diff --git a/main/src/com/google/refine/browsing/util/RowEvaluable.java b/main/src/com/google/refine/browsing/util/RowEvaluable.java new file mode 100644 index 000000000..aeaa61459 --- /dev/null +++ b/main/src/com/google/refine/browsing/util/RowEvaluable.java @@ -0,0 +1,10 @@ +package com.google.refine.browsing.util; + +import java.util.Properties; + +import com.google.refine.model.Project; +import com.google.refine.model.Row; + +public interface RowEvaluable { + public Object eval(Project project, int rowIndex, Row row, Properties bindings); +} diff --git a/main/src/com/google/refine/browsing/util/RowVisitorAsRecordVisitor.java b/main/src/com/google/refine/browsing/util/RowVisitorAsRecordVisitor.java new file mode 100644 index 000000000..66a401a8c --- /dev/null +++ b/main/src/com/google/refine/browsing/util/RowVisitorAsRecordVisitor.java @@ -0,0 +1,34 @@ +package com.google.refine.browsing.util; + +import com.google.refine.browsing.RecordVisitor; +import com.google.refine.browsing.RowVisitor; +import com.google.refine.model.Project; +import com.google.refine.model.Record; + +public class RowVisitorAsRecordVisitor implements RecordVisitor { + final protected RowVisitor _rowVisitor; + + public RowVisitorAsRecordVisitor(RowVisitor rowVisitor) { + _rowVisitor = rowVisitor; + } + + @Override + public void start(Project project) { + _rowVisitor.start(project); + } + + @Override + public void end(Project project) { + _rowVisitor.end(project); + } + + @Override + public boolean visit(Project project, Record record) { + for (int r = record.fromRowIndex; r < record.toRowIndex; r++) { + if (_rowVisitor.visit(project, r, project.rows.get(r))) { + return true; + } + } + return false; + } +} diff --git a/main/src/com/google/refine/browsing/util/TimeBinIndex.java b/main/src/com/google/refine/browsing/util/TimeBinIndex.java new file mode 100644 index 000000000..60c157dc6 --- /dev/null +++ b/main/src/com/google/refine/browsing/util/TimeBinIndex.java @@ -0,0 +1,218 @@ +package com.google.refine.browsing.util; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Date; +import java.util.List; +import java.util.Properties; + +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.model.Project; +import com.google.refine.model.Row; + +/** + * A utility class for computing the base bins that form the base histograms of + * temporal range facets. It evaluates an expression on all the rows of a project to + * get temporal values, determines how many bins to distribute those values in, and + * bins the rows accordingly. + * + * This class processes all rows rather than just the filtered rows because it + * needs to compute the base bins of a temporal range facet, which remain unchanged + * as the user interacts with the facet. + */ +abstract public class TimeBinIndex { + + protected int _totalValueCount; + protected int _timeValueCount; + protected long _min; + protected long _max; + protected long _step; + protected int[] _bins; + + protected int _timeRowCount; + protected int _nonTimeRowCount; + protected int _blankRowCount; + protected int _errorRowCount; + + protected boolean _hasError = false; + protected boolean _hasNonTime = false; + protected boolean _hasTime = false; + protected boolean _hasBlank = false; + + protected long[] steps = { + 1, // msec + 1000, // sec + 1000*60, // min + 1000*60*60, // hour + 1000*60*60*24, // day + 1000*60*60*24*7, // week + 1000*2629746, // month (average Gregorian year / 12) + 1000*31556952, // year (average Gregorian year) + 1000*31556952*10, // decade + 1000*31556952*100, // century + 1000*31556952*1000, // millennium + }; + + abstract protected void iterate(Project project, RowEvaluable rowEvaluable, List allValues); + + public TimeBinIndex(Project project, RowEvaluable rowEvaluable) { + _min = Long.MAX_VALUE; + _max = Long.MIN_VALUE; + + List allValues = new ArrayList(); + + iterate(project, rowEvaluable, allValues); + + _timeValueCount = allValues.size(); + + if (_min >= _max) { + _step = 1; + _min = Math.min(_min, _max); + _max = _step; + _bins = new int[1]; + + return; + } + + long diff = _max - _min; + + for (int i = 0; i < steps.length; i++) { + _step = steps[i]; + if (diff / _step <= 100) break; + } + + _bins = new int[(int) (diff / _step) + 1]; + for (long d : allValues) { + int bin = (int) Math.max((d - _min) / _step,0); + _bins[bin]++; + } + } + + public boolean isTemporal() { + return _timeValueCount > _totalValueCount / 2; + } + + public long getMin() { + return _min; + } + + public long getMax() { + return _max; + } + + public long getStep() { + return _step; + } + + public int[] getBins() { + return _bins; + } + + public int getTimeRowCount() { + return _timeRowCount; + } + + public int getNonTimeRowCount() { + return _nonTimeRowCount; + } + + public int getBlankRowCount() { + return _blankRowCount; + } + + public int getErrorRowCount() { + return _errorRowCount; + } + + protected void processRow( + Project project, + RowEvaluable rowEvaluable, + List allValues, + int rowIndex, + Row row, + Properties bindings + ) { + Object value = rowEvaluable.eval(project, rowIndex, row, bindings); + + if (ExpressionUtils.isError(value)) { + _hasError = true; + } else if (ExpressionUtils.isNonBlankData(value)) { + if (value.getClass().isArray()) { + Object[] a = (Object[]) value; + for (Object v : a) { + _totalValueCount++; + + if (ExpressionUtils.isError(v)) { + _hasError = true; + } else if (ExpressionUtils.isNonBlankData(v)) { + if (v instanceof Date) { + _hasTime = true; + processValue(((Date) v).getTime(), allValues); + } else { + _hasNonTime = true; + } + } else { + _hasBlank = true; + } + } + } else if (value instanceof Collection) { + for (Object v : ExpressionUtils.toObjectCollection(value)) { + _totalValueCount++; + + if (ExpressionUtils.isError(v)) { + _hasError = true; + } else if (ExpressionUtils.isNonBlankData(v)) { + if (v instanceof Date) { + _hasTime = true; + processValue(((Date) v).getTime(), allValues); + } else { + _hasNonTime = true; + } + } else { + _hasBlank = true; + } + } + } else { + _totalValueCount++; + + if (value instanceof Date) { + _hasTime = true; + processValue(((Date) value).getTime(), allValues); + } else { + _hasNonTime = true; + } + } + } else { + _hasBlank = true; + } + } + + protected void preprocessing() { + _hasBlank = false; + _hasError = false; + _hasNonTime = false; + _hasTime = false; + } + + protected void postprocessing() { + if (_hasError) { + _errorRowCount++; + } + if (_hasBlank) { + _blankRowCount++; + } + if (_hasTime) { + _timeRowCount++; + } + if (_hasNonTime) { + _nonTimeRowCount++; + } + } + + protected void processValue(long v, List allValues) { + _min = Math.min(_min, v); + _max = Math.max(_max, v); + allValues.add(v); + } + +} diff --git a/main/src/com/google/refine/browsing/util/TimeBinRecordIndex.java b/main/src/com/google/refine/browsing/util/TimeBinRecordIndex.java new file mode 100644 index 000000000..9020a91f1 --- /dev/null +++ b/main/src/com/google/refine/browsing/util/TimeBinRecordIndex.java @@ -0,0 +1,38 @@ +package com.google.refine.browsing.util; + +import java.util.List; +import java.util.Properties; + +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.model.Project; +import com.google.refine.model.Record; +import com.google.refine.model.Row; + +public class TimeBinRecordIndex extends TimeBinIndex { + + public TimeBinRecordIndex(Project project, RowEvaluable rowEvaluable) { + super(project, rowEvaluable); + } + + @Override + protected void iterate(Project project, RowEvaluable rowEvaluable, List allValues) { + + Properties bindings = ExpressionUtils.createBindings(project); + int count = project.recordModel.getRecordCount(); + + for (int r = 0; r < count; r++) { + Record record = project.recordModel.getRecord(r); + + preprocessing(); + + for (int i = record.fromRowIndex; i < record.toRowIndex; i++) { + Row row = project.rows.get(i); + + processRow(project, rowEvaluable, allValues, i, row, bindings); + } + + postprocessing(); + } + } + +} diff --git a/main/src/com/google/refine/browsing/util/TimeBinRowIndex.java b/main/src/com/google/refine/browsing/util/TimeBinRowIndex.java new file mode 100644 index 000000000..2d9394094 --- /dev/null +++ b/main/src/com/google/refine/browsing/util/TimeBinRowIndex.java @@ -0,0 +1,32 @@ +package com.google.refine.browsing.util; + +import java.util.List; +import java.util.Properties; + +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.model.Project; +import com.google.refine.model.Row; + +public class TimeBinRowIndex extends TimeBinIndex { + + public TimeBinRowIndex(Project project, RowEvaluable rowEvaluable) { + super(project, rowEvaluable); + } + + @Override + protected void iterate(Project project, RowEvaluable rowEvaluable, List allValues) { + + Properties bindings = ExpressionUtils.createBindings(project); + + for (int i = 0; i < project.rows.size(); i++) { + Row row = project.rows.get(i); + + preprocessing(); + + processRow(project, rowEvaluable, allValues, i, row, bindings); + + postprocessing(); + } + } + +} diff --git a/main/src/com/google/refine/clustering/Clusterer.java b/main/src/com/google/refine/clustering/Clusterer.java new file mode 100644 index 000000000..13b7067a9 --- /dev/null +++ b/main/src/com/google/refine/clustering/Clusterer.java @@ -0,0 +1,29 @@ +package com.google.refine.clustering; + +import org.json.JSONObject; + +import com.google.refine.Jsonizable; +import com.google.refine.browsing.Engine; +import com.google.refine.model.Column; +import com.google.refine.model.Project; + +public abstract class Clusterer implements Jsonizable { + + protected Project _project; + protected int _colindex; + protected JSONObject _config; + + public abstract void computeClusters(Engine engine); + + public void initializeFromJSON(Project project, JSONObject o) throws Exception { + _project = project; + _config = o; + + String colname = o.getString("column"); + for (Column column : project.columnModel.columns) { + if (column.getName().equals(colname)) { + _colindex = column.getCellIndex(); + } + } + } +} diff --git a/main/src/com/google/refine/clustering/binning/BinningClusterer.java b/main/src/com/google/refine/clustering/binning/BinningClusterer.java new file mode 100644 index 000000000..bac0593ba --- /dev/null +++ b/main/src/com/google/refine/clustering/binning/BinningClusterer.java @@ -0,0 +1,169 @@ +package com.google.refine.clustering.binning; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.TreeMap; +import java.util.Map.Entry; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.refine.browsing.Engine; +import com.google.refine.browsing.FilteredRows; +import com.google.refine.browsing.RowVisitor; +import com.google.refine.clustering.Clusterer; +import com.google.refine.model.Cell; +import com.google.refine.model.Project; +import com.google.refine.model.Row; + +public class BinningClusterer extends Clusterer { + + private Keyer _keyer; + + static final protected Map _keyers = new HashMap(); + + final static Logger logger = LoggerFactory.getLogger("binning_clusterer"); + + List> _clusters; + + static { + _keyers.put("fingerprint", new FingerprintKeyer()); + _keyers.put("ngram-fingerprint", new NGramFingerprintKeyer()); + _keyers.put("metaphone", new MetaphoneKeyer()); + _keyers.put("double-metaphone", new DoubleMetaphoneKeyer()); + _keyers.put("soundex", new SoundexKeyer()); + } + + class BinningRowVisitor implements RowVisitor { + + Keyer _keyer; + Object[] _params; + JSONObject _config; + + Map> _map = new HashMap>(); + + public BinningRowVisitor(Keyer k, JSONObject o) { + _keyer = k; + _config = o; + if (k instanceof NGramFingerprintKeyer) { + try { + int size = _config.getJSONObject("params").getInt("ngram-size"); + logger.debug("Using ngram size: {}", size); + _params = new Object[1]; + _params[0] = size; + } catch (JSONException e) { + //Gridworks.warn("No params specified, using default"); + } + } + } + + @Override + public void start(Project project) { + // nothing to do + } + + @Override + public void end(Project project) { + // nothing to do + } + + public boolean visit(Project project, int rowIndex, Row row) { + Cell cell = row.getCell(_colindex); + if (cell != null && cell.value != null) { + Object v = cell.value; + String s = (v instanceof String) ? ((String) v) : v.toString(); + String key = _keyer.key(s,_params); + if (_map.containsKey(key)) { + Map m = _map.get(key); + if (m.containsKey(s)) { + m.put(s, m.get(s) + 1); + } else { + m.put(s,1); + } + } else { + Map m = new TreeMap(); + m.put(s,1); + _map.put(key, m); + } + } + return false; + } + + public Map> getMap() { + return _map; + } + } + + public static class SizeComparator implements Comparator>, Serializable { + private static final long serialVersionUID = -1390696157208674054L; + public int compare(Map o1, Map o2) { + int s1 = o1.size(); + int s2 = o2.size(); + if (o1 == o2) { + int total1 = 0; + for (int i : o1.values()) { + total1 += i; + } + int total2 = 0; + for (int i : o2.values()) { + total2 += i; + } + return total2 - total1; + } else { + return s2 - s1; + } + } + } + + public static class EntriesComparator implements Comparator>, Serializable { + private static final long serialVersionUID = 2763378036791777964L; + public int compare(Entry o1, Entry o2) { + return o2.getValue() - o1.getValue(); + } + } + + public void initializeFromJSON(Project project, JSONObject o) throws Exception { + super.initializeFromJSON(project, o); + _keyer = _keyers.get(o.getString("function").toLowerCase()); + } + + public void computeClusters(Engine engine) { + BinningRowVisitor visitor = new BinningRowVisitor(_keyer,_config); + FilteredRows filteredRows = engine.getAllFilteredRows(); + filteredRows.accept(_project, visitor); + + Map> map = visitor.getMap(); + _clusters = new ArrayList>(map.values()); + Collections.sort(_clusters, new SizeComparator()); + } + + public void write(JSONWriter writer, Properties options) throws JSONException { + EntriesComparator c = new EntriesComparator(); + + writer.array(); + for (Map m : _clusters) { + if (m.size() > 1) { + writer.array(); + List> entries = new ArrayList>(m.entrySet()); + Collections.sort(entries,c); + for (Entry e : entries) { + writer.object(); + writer.key("v"); writer.value(e.getKey()); + writer.key("c"); writer.value(e.getValue()); + writer.endObject(); + } + writer.endArray(); + } + } + writer.endArray(); + } +} diff --git a/main/src/com/google/refine/clustering/binning/DoubleMetaphoneKeyer.java b/main/src/com/google/refine/clustering/binning/DoubleMetaphoneKeyer.java new file mode 100644 index 000000000..1166adb62 --- /dev/null +++ b/main/src/com/google/refine/clustering/binning/DoubleMetaphoneKeyer.java @@ -0,0 +1,18 @@ +package com.google.refine.clustering.binning; + +import org.apache.commons.codec.language.DoubleMetaphone; + +public class DoubleMetaphoneKeyer extends Keyer { + + private DoubleMetaphone _metaphone2; + + public DoubleMetaphoneKeyer() { + _metaphone2 = new DoubleMetaphone(); + _metaphone2.setMaxCodeLen(2000); + } + + public String key(String s, Object... o) { + return _metaphone2.doubleMetaphone(s); + } + +} diff --git a/main/src/com/google/refine/clustering/binning/FingerprintKeyer.java b/main/src/com/google/refine/clustering/binning/FingerprintKeyer.java new file mode 100644 index 000000000..df8fc87c0 --- /dev/null +++ b/main/src/com/google/refine/clustering/binning/FingerprintKeyer.java @@ -0,0 +1,249 @@ +package com.google.refine.clustering.binning; + +import java.util.Iterator; +import java.util.TreeSet; +import java.util.regex.Pattern; + +import org.apache.commons.lang.StringUtils; + +public class FingerprintKeyer extends Keyer { + + static final Pattern alphanum = Pattern.compile("\\p{Punct}|\\p{Cntrl}"); + + public String key(String s, Object... o) { + s = s.trim(); // first off, remove whitespace around the string + s = s.toLowerCase(); // then lowercase it + s = alphanum.matcher(s).replaceAll(""); // then remove all punctuation and control chars + String[] frags = StringUtils.split(s); // split by whitespace + TreeSet set = new TreeSet(); + for (String ss : frags) { + set.add(ss); // order fragments and dedupe + } + StringBuffer b = new StringBuffer(); + Iterator i = set.iterator(); + while (i.hasNext()) { // join ordered fragments back together + b.append(i.next()); + b.append(' '); + } + return asciify(b.toString()); // find ASCII equivalent to characters + } + + protected String asciify(String s) { + char[] c = s.toCharArray(); + StringBuffer b = new StringBuffer(); + for (int i = 0; i < c.length; i++) { + b.append(translate(c[i])); + } + return b.toString(); + } + + /** + * Translate the given unicode char in the closest ASCII representation + * NOTE: this function deals only with latin-1 supplement and latin-1 extended code charts + */ + private char translate(char c) { + switch(c) { + case '\u00C0': + case '\u00C1': + case '\u00C2': + case '\u00C3': + case '\u00C4': + case '\u00C5': + case '\u00E0': + case '\u00E1': + case '\u00E2': + case '\u00E3': + case '\u00E4': + case '\u00E5': + case '\u0100': + case '\u0101': + case '\u0102': + case '\u0103': + case '\u0104': + case '\u0105': + return 'a'; + case '\u00C7': + case '\u00E7': + case '\u0106': + case '\u0107': + case '\u0108': + case '\u0109': + case '\u010A': + case '\u010B': + case '\u010C': + case '\u010D': + return 'c'; + case '\u00D0': + case '\u00F0': + case '\u010E': + case '\u010F': + case '\u0110': + case '\u0111': + return 'd'; + case '\u00C8': + case '\u00C9': + case '\u00CA': + case '\u00CB': + case '\u00E8': + case '\u00E9': + case '\u00EA': + case '\u00EB': + case '\u0112': + case '\u0113': + case '\u0114': + case '\u0115': + case '\u0116': + case '\u0117': + case '\u0118': + case '\u0119': + case '\u011A': + case '\u011B': + return 'e'; + case '\u011C': + case '\u011D': + case '\u011E': + case '\u011F': + case '\u0120': + case '\u0121': + case '\u0122': + case '\u0123': + return 'g'; + case '\u0124': + case '\u0125': + case '\u0126': + case '\u0127': + return 'h'; + case '\u00CC': + case '\u00CD': + case '\u00CE': + case '\u00CF': + case '\u00EC': + case '\u00ED': + case '\u00EE': + case '\u00EF': + case '\u0128': + case '\u0129': + case '\u012A': + case '\u012B': + case '\u012C': + case '\u012D': + case '\u012E': + case '\u012F': + case '\u0130': + case '\u0131': + return 'i'; + case '\u0134': + case '\u0135': + return 'j'; + case '\u0136': + case '\u0137': + case '\u0138': + return 'k'; + case '\u0139': + case '\u013A': + case '\u013B': + case '\u013C': + case '\u013D': + case '\u013E': + case '\u013F': + case '\u0140': + case '\u0141': + case '\u0142': + return 'l'; + case '\u00D1': + case '\u00F1': + case '\u0143': + case '\u0144': + case '\u0145': + case '\u0146': + case '\u0147': + case '\u0148': + case '\u0149': + case '\u014A': + case '\u014B': + return 'n'; + case '\u00D2': + case '\u00D3': + case '\u00D4': + case '\u00D5': + case '\u00D6': + case '\u00D8': + case '\u00F2': + case '\u00F3': + case '\u00F4': + case '\u00F5': + case '\u00F6': + case '\u00F8': + case '\u014C': + case '\u014D': + case '\u014E': + case '\u014F': + case '\u0150': + case '\u0151': + return 'o'; + case '\u0154': + case '\u0155': + case '\u0156': + case '\u0157': + case '\u0158': + case '\u0159': + return 'r'; + case '\u015A': + case '\u015B': + case '\u015C': + case '\u015D': + case '\u015E': + case '\u015F': + case '\u0160': + case '\u0161': + case '\u017F': + return 's'; + case '\u0162': + case '\u0163': + case '\u0164': + case '\u0165': + case '\u0166': + case '\u0167': + return 't'; + case '\u00D9': + case '\u00DA': + case '\u00DB': + case '\u00DC': + case '\u00F9': + case '\u00FA': + case '\u00FB': + case '\u00FC': + case '\u0168': + case '\u0169': + case '\u016A': + case '\u016B': + case '\u016C': + case '\u016D': + case '\u016E': + case '\u016F': + case '\u0170': + case '\u0171': + case '\u0172': + case '\u0173': + return 'u'; + case '\u0174': + case '\u0175': + return 'w'; + case '\u00DD': + case '\u00FD': + case '\u00FF': + case '\u0176': + case '\u0177': + case '\u0178': + return 'y'; + case '\u0179': + case '\u017A': + case '\u017B': + case '\u017C': + case '\u017D': + case '\u017E': + return 'z'; + } + return c; + } +} diff --git a/main/src/com/google/refine/clustering/binning/Keyer.java b/main/src/com/google/refine/clustering/binning/Keyer.java new file mode 100644 index 000000000..46c0eb1e8 --- /dev/null +++ b/main/src/com/google/refine/clustering/binning/Keyer.java @@ -0,0 +1,12 @@ +package com.google.refine.clustering.binning; + + +public abstract class Keyer { + + public String key(String s) { + return this.key(s, (Object[]) null); + } + + public abstract String key(String string, Object... params); + +} diff --git a/main/src/com/google/refine/clustering/binning/MetaphoneKeyer.java b/main/src/com/google/refine/clustering/binning/MetaphoneKeyer.java new file mode 100644 index 000000000..f532e8031 --- /dev/null +++ b/main/src/com/google/refine/clustering/binning/MetaphoneKeyer.java @@ -0,0 +1,18 @@ +package com.google.refine.clustering.binning; + +import org.apache.commons.codec.language.Metaphone; + +public class MetaphoneKeyer extends Keyer { + + private Metaphone _metaphone; + + public MetaphoneKeyer() { + _metaphone = new Metaphone(); + _metaphone.setMaxCodeLen(2000); + } + + public String key(String s, Object... o) { + return _metaphone.metaphone(s); + } + +} diff --git a/main/src/com/google/refine/clustering/binning/NGramFingerprintKeyer.java b/main/src/com/google/refine/clustering/binning/NGramFingerprintKeyer.java new file mode 100644 index 000000000..3b493d279 --- /dev/null +++ b/main/src/com/google/refine/clustering/binning/NGramFingerprintKeyer.java @@ -0,0 +1,35 @@ +package com.google.refine.clustering.binning; + +import java.util.Iterator; +import java.util.TreeSet; +import java.util.regex.Pattern; + +public class NGramFingerprintKeyer extends FingerprintKeyer { + + static final Pattern alphanum = Pattern.compile("\\p{Punct}|\\p{Cntrl}|\\p{Space}"); + + public String key(String s, Object... o) { + int ngram_size = 2; + if (o != null && o.length > 0 && o[0] instanceof Number) { + ngram_size = (Integer) o[0]; + } + s = s.toLowerCase(); // then lowercase it + s = alphanum.matcher(s).replaceAll(""); // then remove all punctuation and control chars + TreeSet set = ngram_split(s,ngram_size); + StringBuffer b = new StringBuffer(); + Iterator i = set.iterator(); + while (i.hasNext()) { // join ordered fragments back together + b.append(i.next()); + } + return asciify(b.toString()); // find ASCII equivalent to characters + } + + protected TreeSet ngram_split(String s, int size) { + TreeSet set = new TreeSet(); + char[] chars = s.toCharArray(); + for (int i = 0; i + size <= chars.length; i++) { + set.add(new String(chars,i,size)); + } + return set; + } +} diff --git a/main/src/com/google/refine/clustering/binning/SoundexKeyer.java b/main/src/com/google/refine/clustering/binning/SoundexKeyer.java new file mode 100644 index 000000000..4a53f1fa0 --- /dev/null +++ b/main/src/com/google/refine/clustering/binning/SoundexKeyer.java @@ -0,0 +1,17 @@ +package com.google.refine.clustering.binning; + +import org.apache.commons.codec.language.Soundex; + +public class SoundexKeyer extends Keyer { + + private Soundex _soundex; + + public SoundexKeyer() { + _soundex = new Soundex(); + } + + public String key(String s, Object... o) { + return _soundex.soundex(s); + } + +} diff --git a/main/src/com/google/refine/clustering/knn/kNNClusterer.java b/main/src/com/google/refine/clustering/knn/kNNClusterer.java new file mode 100644 index 000000000..005c71158 --- /dev/null +++ b/main/src/com/google/refine/clustering/knn/kNNClusterer.java @@ -0,0 +1,211 @@ +package com.google.refine.clustering.knn; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.Set; +import java.util.Map.Entry; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.refine.browsing.Engine; +import com.google.refine.browsing.FilteredRows; +import com.google.refine.browsing.RowVisitor; +import com.google.refine.clustering.Clusterer; +import com.google.refine.model.Cell; +import com.google.refine.model.Project; +import com.google.refine.model.Row; + +import edu.mit.simile.vicino.clustering.NGramClusterer; +import edu.mit.simile.vicino.clustering.VPTreeClusterer; +import edu.mit.simile.vicino.distances.BZip2Distance; +import edu.mit.simile.vicino.distances.Distance; +import edu.mit.simile.vicino.distances.GZipDistance; +import edu.mit.simile.vicino.distances.JaccardDistance; +import edu.mit.simile.vicino.distances.JaroDistance; +import edu.mit.simile.vicino.distances.JaroWinklerDistance; +import edu.mit.simile.vicino.distances.JaroWinklerTFIDFDistance; +import edu.mit.simile.vicino.distances.LevenshteinDistance; +import edu.mit.simile.vicino.distances.PPMDistance; + +public class kNNClusterer extends Clusterer { + + private Distance _distance; + + static final protected Map _distances = new HashMap(); + + List> _clusters; + + Map _counts = new HashMap(); + + final static Logger logger = LoggerFactory.getLogger("kNN_clusterer"); + + static { + _distances.put("levenshtein", new LevenshteinDistance()); + _distances.put("jaccard", new JaccardDistance()); + _distances.put("jaro", new JaroDistance()); + _distances.put("jaro-winkler", new JaroWinklerDistance()); + _distances.put("jaro-winkler-tfidf", new JaroWinklerTFIDFDistance()); + _distances.put("gzip", new GZipDistance()); + _distances.put("bzip2", new BZip2Distance()); + _distances.put("ppm", new PPMDistance()); + } + + class VPTreeClusteringRowVisitor implements RowVisitor { + + Distance _distance; + JSONObject _config; + VPTreeClusterer _clusterer; + double _radius = 1.0f; + + public VPTreeClusteringRowVisitor(Distance d, JSONObject o) { + _distance = d; + _config = o; + _clusterer = new VPTreeClusterer(_distance); + try { + JSONObject params = o.getJSONObject("params"); + _radius = params.getDouble("radius"); + } catch (JSONException e) { + //Gridworks.warn("No parameters found, using defaults"); + } + } + + @Override + public void start(Project project) { + // nothing to do + } + + @Override + public void end(Project project) { + // nothing to do + } + + public boolean visit(Project project, int rowIndex, Row row) { + Cell cell = row.getCell(_colindex); + if (cell != null && cell.value != null) { + Object v = cell.value; + String s = (v instanceof String) ? ((String) v) : v.toString(); + _clusterer.populate(s); + count(s); + } + return false; + } + + public List> getClusters() { + return _clusterer.getClusters(_radius); + } + } + + class BlockingClusteringRowVisitor implements RowVisitor { + + Distance _distance; + JSONObject _config; + double _radius = 1.0d; + int _blockingNgramSize = 6; + HashSet _data; + NGramClusterer _clusterer; + + public BlockingClusteringRowVisitor(Distance d, JSONObject o) { + _distance = d; + _config = o; + _data = new HashSet(); + try { + JSONObject params = o.getJSONObject("params"); + _radius = params.getDouble("radius"); + logger.debug("Use radius: {}", _radius); + _blockingNgramSize = params.getInt("blocking-ngram-size"); + logger.debug("Use blocking ngram size: {}",_blockingNgramSize); + } catch (JSONException e) { + logger.debug("No parameters found, using defaults"); + } + _clusterer = new NGramClusterer(_distance, _blockingNgramSize); + } + + @Override + public void start(Project project) { + // nothing to do + } + + @Override + public void end(Project project) { + // nothing to do + } + + public boolean visit(Project project, int rowIndex, Row row) { + Cell cell = row.getCell(_colindex); + if (cell != null && cell.value != null) { + Object v = cell.value; + String s = (v instanceof String) ? ((String) v) : v.toString().intern(); + _clusterer.populate(s); + count(s); + } + return false; + } + + public List> getClusters() { + return _clusterer.getClusters(_radius); + } + } + + public void initializeFromJSON(Project project, JSONObject o) throws Exception { + super.initializeFromJSON(project, o); + _distance = _distances.get(o.getString("function").toLowerCase()); + } + + public void computeClusters(Engine engine) { + //VPTreeClusteringRowVisitor visitor = new VPTreeClusteringRowVisitor(_distance,_config); + BlockingClusteringRowVisitor visitor = new BlockingClusteringRowVisitor(_distance,_config); + FilteredRows filteredRows = engine.getAllFilteredRows(); + filteredRows.accept(_project, visitor); + + _clusters = visitor.getClusters(); + } + + public static class ValuesComparator implements Comparator>, Serializable { + private static final long serialVersionUID = 204469656070583155L; + public int compare(Entry o1, Entry o2) { + return o2.getValue() - o1.getValue(); + } + } + + public void write(JSONWriter writer, Properties options) throws JSONException { + writer.array(); + for (Set m : _clusters) { + if (m.size() > 1) { + Map internal_counts = new HashMap(); + for (Serializable s : m) { + internal_counts.put(s,_counts.get(s)); + } + List> values = new ArrayList>(internal_counts.entrySet()); + Collections.sort(values, new ValuesComparator()); + writer.array(); + for (Entry e : values) { + writer.object(); + writer.key("v"); writer.value(e.getKey()); + writer.key("c"); writer.value(e.getValue()); + writer.endObject(); + } + writer.endArray(); + } + } + writer.endArray(); + } + + private void count(Serializable s) { + if (_counts.containsKey(s)) { + _counts.put(s, _counts.get(s) + 1); + } else { + _counts.put(s, 1); + } + } +} diff --git a/main/src/com/google/refine/commands/Command.java b/main/src/com/google/refine/commands/Command.java new file mode 100644 index 000000000..3a44ddc0c --- /dev/null +++ b/main/src/com/google/refine/commands/Command.java @@ -0,0 +1,272 @@ +package com.google.refine.commands; + +import java.io.IOException; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.io.Writer; +import java.util.Properties; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.refine.GridworksServlet; +import com.google.refine.Jsonizable; +import com.google.refine.ProjectManager; +import com.google.refine.ProjectMetadata; +import com.google.refine.browsing.Engine; +import com.google.refine.history.HistoryEntry; +import com.google.refine.model.Project; +import com.google.refine.process.Process; +import com.google.refine.util.ParsingUtilities; + +/** + * The super class of all calls that the client side can invoke, most of which + * are AJAX calls. + */ +public abstract class Command { + + final static protected Logger logger = LoggerFactory.getLogger("command"); + + protected GridworksServlet servlet; + + public void init(GridworksServlet servlet) { + this.servlet = servlet; + } + + public void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + throw new UnsupportedOperationException(); + }; + + public void doGet(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + throw new UnsupportedOperationException(); + }; + + /** + * Utility function to get the browsing engine's configuration as a JSON object + * from the "engine" request parameter, most often in the POST body. + * + * @param request + * @return + * @throws JSONException + */ + static protected JSONObject getEngineConfig(HttpServletRequest request) + throws JSONException { + if (request == null) throw new IllegalArgumentException("parameter 'request' should not be null"); + + String json = request.getParameter("engine"); + try{ + return (json == null) ? null : ParsingUtilities.evaluateJsonStringToObject(json); + } catch (JSONException e){ + logger.debug( json + " could not be parsed to JSON"); + return null; + } + } + + /** + * Utility function to reconstruct the browsing engine from the "engine" request parameter, + * most often in the POST body. + * + * @param request + * @param project + * @return + * @throws Exception + */ + static protected Engine getEngine(HttpServletRequest request, Project project) + throws Exception { + if (request == null) throw new IllegalArgumentException("parameter 'request' should not be null"); + if (project == null) throw new IllegalArgumentException("parameter 'project' should not be null"); + + Engine engine = new Engine(project); + JSONObject o = getEngineConfig(request); + if (o != null) + engine.initializeFromJSON(o); + return engine; + } + + /** + * Utility method for retrieving the Project object having the ID specified + * in the "project" URL parameter. + * + * @param request + * @return + * @throws ServletException + */ + protected Project getProject(HttpServletRequest request) throws ServletException { + if (request == null) throw new IllegalArgumentException("parameter 'request' should not be null"); + try { + Project p = ProjectManager.singleton.getProject(Long.parseLong(request.getParameter("project"))); + if (p != null) { + return p; + } + } catch (Exception e) { + // ignore + } + throw new ServletException("Can't find project: missing or bad URL parameter"); + } + + /** + * Utility method for retrieving the ProjectMetadata object having the ID specified + * in the "project" URL parameter. + * + * @param request + * @return + * @throws ServletException + */ + protected ProjectMetadata getProjectMetadata(HttpServletRequest request) throws ServletException { + if (request == null) throw new IllegalArgumentException("parameter 'request' should not be null"); + try { + ProjectMetadata pm = ProjectManager.singleton.getProjectMetadata(Long.parseLong(request.getParameter("project"))); + if (pm != null) { + return pm; + } + } catch (Exception e) { + // ignore + } + throw new ServletException("Can't find project metadata: missing or bad URL parameter"); + } + + static protected int getIntegerParameter(HttpServletRequest request, String name, int def) { + if (request == null) throw new IllegalArgumentException("parameter 'request' should not be null"); + try { + return Integer.parseInt(request.getParameter(name)); + } catch (Exception e) { + // ignore + } + return def; + } + + static protected JSONObject getJsonParameter(HttpServletRequest request, String name) { + if (request == null) throw new IllegalArgumentException("parameter 'request' should not be null"); + String value = request.getParameter(name); + if (value != null) { + try { + return ParsingUtilities.evaluateJsonStringToObject(value); + } catch (JSONException e) { + logger.warn("error getting json parameter",e); + } + } + return null; + } + + static protected void performProcessAndRespond( + HttpServletRequest request, + HttpServletResponse response, + Project project, + Process process + ) throws Exception { + response.setCharacterEncoding("UTF-8"); + response.setHeader("Content-Type", "application/json"); + + HistoryEntry historyEntry = project.processManager.queueProcess(process); + if (historyEntry != null) { + Writer w = response.getWriter(); + JSONWriter writer = new JSONWriter(w); + Properties options = new Properties(); + + writer.object(); + writer.key("code"); writer.value("ok"); + writer.key("historyEntry"); historyEntry.write(writer, options); + writer.endObject(); + + w.flush(); + w.close(); + } else { + respond(response, "{ \"code\" : \"pending\" }"); + } + } + + static protected void respond(HttpServletResponse response, String content) + throws IOException, ServletException { + + response.setCharacterEncoding("UTF-8"); + response.setStatus(HttpServletResponse.SC_OK); + Writer w = response.getWriter(); + if (w != null) { + w.write(content); + w.flush(); + w.close(); + } else { + throw new ServletException("response returned a null writer"); + } + } + + static protected void respond(HttpServletResponse response, String status, String message) + throws IOException, JSONException { + + Writer w = response.getWriter(); + JSONWriter writer = new JSONWriter(w); + writer.object(); + writer.key("status"); writer.value(status); + writer.key("message"); writer.value(message); + writer.endObject(); + w.flush(); + w.close(); + } + + static protected void respondJSON(HttpServletResponse response, Jsonizable o) + throws IOException, JSONException { + + respondJSON(response, o, new Properties()); + } + + static protected void respondJSON( + HttpServletResponse response, Jsonizable o, Properties options) + throws IOException, JSONException { + + response.setCharacterEncoding("UTF-8"); + response.setHeader("Content-Type", "application/json"); + + Writer w = response.getWriter(); + JSONWriter writer = new JSONWriter(w); + + o.write(writer, options); + w.flush(); + w.close(); + } + + static protected void respondException(HttpServletResponse response, Exception e) + throws IOException, ServletException { + + logger.warn("Exception caught", e); + + if (response == null) { + throw new ServletException("Response object can't be null"); + } + + try { + JSONObject o = new JSONObject(); + o.put("code", "error"); + o.put("message", e.getMessage()); + + StringWriter sw = new StringWriter(); + PrintWriter pw = new PrintWriter(sw); + e.printStackTrace(pw); + pw.flush(); + sw.flush(); + + o.put("stack", sw.toString()); + + response.setCharacterEncoding("UTF-8"); + response.setHeader("Content-Type", "application/json"); + respond(response, o.toString()); + } catch (JSONException e1) { + e.printStackTrace(response.getWriter()); + } + } + + static protected void redirect(HttpServletResponse response, String url) throws IOException { + response.sendRedirect(url); + } + +} diff --git a/main/src/com/google/refine/commands/EngineDependentCommand.java b/main/src/com/google/refine/commands/EngineDependentCommand.java new file mode 100644 index 000000000..f46a3d61c --- /dev/null +++ b/main/src/com/google/refine/commands/EngineDependentCommand.java @@ -0,0 +1,56 @@ +package com.google.refine.commands; + +import java.io.IOException; +import java.util.Properties; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.json.JSONObject; + +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; +import com.google.refine.process.Process; + +/** + * Convenient super class for commands that perform abstract operations on + * only the filtered rows based on the faceted browsing engine's configuration + * on the client side. + * + * The engine's configuration is passed over as a POST body parameter. It is + * retrieved, de-serialized, and used to construct the abstract operation. + * The operation is then used to construct a process. The process is then + * queued for execution. If the process is not long running and there is no + * other queued process, then it gets executed right away, resulting in some + * change to the history. Otherwise, it is pending. The client side can + * decide how to update its UI depending on whether the process is done or + * still pending. + * + * Note that there are interactions on the client side that change only + * individual cells or individual rows (such as starring one row or editing + * the text of one cell). These interactions do not depend on the faceted + * browsing engine's configuration, and so they don't invoke commands that + * subclass this class. See AnnotateOneRowCommand and EditOneCellCommand as + * examples. + */ +abstract public class EngineDependentCommand extends Command { + @Override + public void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + try { + Project project = getProject(request); + + AbstractOperation op = createOperation(project, request, getEngineConfig(request)); + Process process = op.createProcess(project, new Properties()); + + performProcessAndRespond(request, response, project, process); + } catch (Exception e) { + respondException(response, e); + } + } + + abstract protected AbstractOperation createOperation( + Project project, HttpServletRequest request, JSONObject engineConfig) throws Exception; +} diff --git a/main/src/com/google/refine/commands/GetAllPreferencesCommand.java b/main/src/com/google/refine/commands/GetAllPreferencesCommand.java new file mode 100644 index 000000000..3c5c97751 --- /dev/null +++ b/main/src/com/google/refine/commands/GetAllPreferencesCommand.java @@ -0,0 +1,48 @@ +package com.google.refine.commands; + +import java.io.IOException; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.ProjectManager; +import com.google.refine.model.Project; +import com.google.refine.preference.PreferenceStore; + +public class GetAllPreferencesCommand extends Command { + @Override + public void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + Project project = request.getParameter("project") != null ? getProject(request) : null; + PreferenceStore ps = project != null ? + project.getMetadata().getPreferenceStore() : + ProjectManager.singleton.getPreferenceStore(); + + try { + response.setCharacterEncoding("UTF-8"); + response.setHeader("Content-Type", "application/json"); + + JSONWriter writer = new JSONWriter(response.getWriter()); + + writer.object(); + + for (String key : ps.getKeys()) { + Object pref = ps.get(key); + if (pref == null || pref instanceof String || pref instanceof Number || pref instanceof Boolean) { + writer.key(key); + writer.value(pref); + } + } + + writer.endObject(); + } catch (JSONException e) { + respondException(response, e); + } + } + +} diff --git a/main/src/com/google/refine/commands/GetPreferenceCommand.java b/main/src/com/google/refine/commands/GetPreferenceCommand.java new file mode 100644 index 000000000..a79abfd90 --- /dev/null +++ b/main/src/com/google/refine/commands/GetPreferenceCommand.java @@ -0,0 +1,54 @@ +package com.google.refine.commands; + +import java.io.IOException; +import java.util.Properties; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.ProjectManager; +import com.google.refine.model.Project; +import com.google.refine.preference.PreferenceStore; +import com.google.refine.preference.TopList; + +public class GetPreferenceCommand extends Command { + @Override + public void doGet(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + Project project = request.getParameter("project") != null ? getProject(request) : null; + PreferenceStore ps = project != null ? + project.getMetadata().getPreferenceStore() : + ProjectManager.singleton.getPreferenceStore(); + + String prefName = request.getParameter("name"); + Object pref = ps.get(prefName); + + try { + response.setCharacterEncoding("UTF-8"); + response.setHeader("Content-Type", "application/json"); + + JSONWriter writer = new JSONWriter(response.getWriter()); + + writer.object(); + writer.key("value"); + if (pref == null || pref instanceof String || pref instanceof Number || pref instanceof Boolean) { + writer.value(pref); + } else if (pref instanceof TopList) { + TopList tl = (TopList) pref; + tl.write(writer, new Properties()); + } else { + writer.value(pref.toString()); + } + + writer.endObject(); + } catch (JSONException e) { + respondException(response, e); + } + } + +} diff --git a/main/src/com/google/refine/commands/OpenWorkspaceDirCommand.java b/main/src/com/google/refine/commands/OpenWorkspaceDirCommand.java new file mode 100644 index 000000000..2a87a34aa --- /dev/null +++ b/main/src/com/google/refine/commands/OpenWorkspaceDirCommand.java @@ -0,0 +1,37 @@ +package com.google.refine.commands; + +import java.io.File; +import java.io.IOException; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import com.google.refine.ProjectManager; +import com.google.refine.io.FileProjectManager; + +public class OpenWorkspaceDirCommand extends Command { + @Override + public void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + String serverName = request.getServerName(); + + if (!"127.0.0.1".equals(serverName) && !"localhost".equals(serverName)) { + respond(response, "{ \"code\" : \"error\", \"message\" : \"Workspace directory can only be opened on the local machine where Gridworks is run.\" }"); + } else if (ProjectManager.singleton instanceof FileProjectManager) { + File dir = ((FileProjectManager) ProjectManager.singleton).getWorkspaceDir(); + + Runtime.getRuntime().exec( + "open .", + new String[] {}, + dir + ); + + respond(response, "{ \"code\" : \"ok\" }"); + } else { + respond(response, "{ \"code\" : \"error\", \"message\" : \"Workspace is not stored on the file system.\" }"); + } + } + +} diff --git a/main/src/com/google/refine/commands/SetPreferenceCommand.java b/main/src/com/google/refine/commands/SetPreferenceCommand.java new file mode 100644 index 000000000..6d316ea9e --- /dev/null +++ b/main/src/com/google/refine/commands/SetPreferenceCommand.java @@ -0,0 +1,40 @@ +package com.google.refine.commands; + +import java.io.IOException; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.json.JSONException; +import org.json.JSONTokener; + +import com.google.refine.ProjectManager; +import com.google.refine.model.Project; +import com.google.refine.preference.PreferenceStore; + +public class SetPreferenceCommand extends Command { + @Override + public void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + Project project = request.getParameter("project") != null ? getProject(request) : null; + PreferenceStore ps = project != null ? + project.getMetadata().getPreferenceStore() : + ProjectManager.singleton.getPreferenceStore(); + + String prefName = request.getParameter("name"); + String valueString = request.getParameter("value"); + + try { + Object o = valueString == null ? null : new JSONTokener(valueString).nextValue(); + + ps.put(prefName, PreferenceStore.loadObject(o)); + + respond(response, "{ \"code\" : \"ok\" }"); + } catch (JSONException e) { + respondException(response, e); + } + } + +} diff --git a/main/src/com/google/refine/commands/auth/AuthorizeCommand.java b/main/src/com/google/refine/commands/auth/AuthorizeCommand.java new file mode 100644 index 000000000..bb822f628 --- /dev/null +++ b/main/src/com/google/refine/commands/auth/AuthorizeCommand.java @@ -0,0 +1,134 @@ +package com.google.refine.commands.auth; + +import java.io.IOException; +import java.io.PrintWriter; +import java.net.URI; +import java.net.URISyntaxException; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import oauth.signpost.OAuthConsumer; +import oauth.signpost.OAuthProvider; + +import com.google.refine.commands.Command; +import com.google.refine.oauth.Credentials; +import com.google.refine.oauth.OAuthUtilities; +import com.google.refine.oauth.Provider; + +public class AuthorizeCommand extends Command { + + private static final String OAUTH_VERIFIER_PARAM = "oauth_verifier"; + + public void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + + // get the provider from the request + Provider provider = OAuthUtilities.getProvider(request); + + try { + + // see if the request comes with access credentials + Credentials access_credentials = Credentials.getCredentials(request, provider, Credentials.Type.ACCESS); + + // prepare the continuation URL that the OAuth provider will redirect the user to + // (we need to make sure this URL points back to this code or the dance will never complete) + String callbackURL = getBaseURL(request,provider); + + if (access_credentials == null) { + // access credentials are not available so we need to check + // to see at what stage of the OAuth dance we are + + // get the request token credentials + Credentials request_credentials = Credentials.getCredentials(request, provider, Credentials.Type.REQUEST); + + OAuthConsumer consumer = OAuthUtilities.getConsumer(request_credentials, provider); + OAuthProvider pp = OAuthUtilities.getOAuthProvider(provider); + + if (request_credentials == null) { + // no credentials were found, so let's start the dance + + // get the request token + + String url = pp.retrieveRequestToken(consumer, callbackURL); + + request_credentials = new Credentials(consumer.getToken(), consumer.getTokenSecret(), provider); + + // and set them to that we can retrieve them later in the second part of the dance + Credentials.setCredentials(request, response, request_credentials, Credentials.Type.REQUEST, 3600); + + // now redirect the user to the Authorize URL where she can authenticate against the + // service provider and authorize us. + // The provider will bounce the user back here for us to continue the dance. + + response.sendRedirect(url); + } else { + // we are at the second stage of the dance, so we need need to obtain the access credentials now + + // if we got here, it means that the user performed a valid authentication against the + // service provider and authorized us, so now we can request more permanent credentials + // to the service provider and save those as well for later use. + + // this is set only for OAuth 1.0a + String verificationCode = request.getParameter(OAUTH_VERIFIER_PARAM); + + pp.retrieveAccessToken(consumer, verificationCode); + + access_credentials = new Credentials(consumer.getToken(), consumer.getTokenSecret(), provider); + + // no matter the result, we need to remove the request token + Credentials.deleteCredentials(request, response, provider, Credentials.Type.REQUEST); + + Credentials.setCredentials(request, response, access_credentials, Credentials.Type.ACCESS, 30 * 24 * 3600); + + finish(response); + } + } else { + finish(response); + } + } catch (Exception e) { + Credentials.deleteCredentials(request, response, provider, Credentials.Type.REQUEST); + Credentials.deleteCredentials(request, response, provider, Credentials.Type.ACCESS); + respondException(response, e); + } + } + + private void finish(HttpServletResponse response) throws IOException { + response.setCharacterEncoding("UTF-8"); + response.setHeader("Content-Type", "text/html"); + + PrintWriter writer = response.getWriter(); + writer.write( + "" + + "" + + "" + + "" + ); + writer.flush(); + } + + private String getBaseURL(HttpServletRequest request, Provider provider) { + String host = request.getHeader("host"); + if (host == null) { + String referrer = request.getHeader("referer"); + if (referrer != null) { + URI url; + try { + url = new URI(referrer); + int port = url.getPort(); + host = url.getHost() + ((port > -1) ? ":" + url.getPort() : ""); + } catch (URISyntaxException e) { + throw new RuntimeException("referrer '" + referrer + "' can't be parsed as a URL"); + } + } else { + throw new RuntimeException("neither the 'host' nor 'referer' headers were present in the HTTP response, I can't determine what URL gridworks is listening to."); + } + } + return "http://" + host + "/command/core/authorize/" + provider.getHost(); + } +} diff --git a/main/src/com/google/refine/commands/auth/CheckAuthorizationCommand.java b/main/src/com/google/refine/commands/auth/CheckAuthorizationCommand.java new file mode 100644 index 000000000..e6dd3f3d9 --- /dev/null +++ b/main/src/com/google/refine/commands/auth/CheckAuthorizationCommand.java @@ -0,0 +1,47 @@ +package com.google.refine.commands.auth; + +import java.io.IOException; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.refine.commands.Command; +import com.google.refine.oauth.Credentials; +import com.google.refine.oauth.OAuthUtilities; +import com.google.refine.oauth.Provider; +import com.google.refine.util.FreebaseUtils; + +public class CheckAuthorizationCommand extends Command { + + final static Logger logger = LoggerFactory.getLogger("check-authorization_command"); + + public void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + + try { + Provider provider = OAuthUtilities.getProvider(request); + + // this cookie should not be there, but this is good hygiene practice + Credentials.deleteCredentials(request, response, provider, Credentials.Type.REQUEST); + + Credentials access_credentials = Credentials.getCredentials(request, provider, Credentials.Type.ACCESS); + + response.setCharacterEncoding("UTF-8"); + response.setHeader("Content-Type", "application/json"); + + if (access_credentials != null) { + String user_info = FreebaseUtils.getUserInfo(access_credentials, provider); + response.getWriter().write(user_info); + } else { + respond(response, "401 Unauthorized", "You don't have the right credentials"); + } + } catch (Exception e) { + logger.info("error",e); + respondException(response, e); + } + } + +} diff --git a/main/src/com/google/refine/commands/auth/DeAuthorizeCommand.java b/main/src/com/google/refine/commands/auth/DeAuthorizeCommand.java new file mode 100644 index 000000000..f21a8da37 --- /dev/null +++ b/main/src/com/google/refine/commands/auth/DeAuthorizeCommand.java @@ -0,0 +1,31 @@ +package com.google.refine.commands.auth; + +import java.io.IOException; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import com.google.refine.commands.Command; +import com.google.refine.oauth.Credentials; +import com.google.refine.oauth.OAuthUtilities; +import com.google.refine.oauth.Provider; + +public class DeAuthorizeCommand extends Command { + + public void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + + try { + response.setCharacterEncoding("UTF-8"); + response.setHeader("Content-Type", "application/json"); + + Provider provider = OAuthUtilities.getProvider(request); + + Credentials.deleteCredentials(request, response, provider, Credentials.Type.ACCESS); + + respond(response, "200 OK", ""); + } catch (Exception e) { + respondException(response, e); + } + } +} diff --git a/main/src/com/google/refine/commands/auth/GetUserBadgesCommand.java b/main/src/com/google/refine/commands/auth/GetUserBadgesCommand.java new file mode 100644 index 000000000..70979702e --- /dev/null +++ b/main/src/com/google/refine/commands/auth/GetUserBadgesCommand.java @@ -0,0 +1,36 @@ +package com.google.refine.commands.auth; + +import java.io.IOException; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.refine.commands.Command; +import com.google.refine.oauth.OAuthUtilities; +import com.google.refine.oauth.Provider; +import com.google.refine.util.FreebaseUtils; + +public class GetUserBadgesCommand extends Command { + + final static Logger logger = LoggerFactory.getLogger("check-authorization_command"); + + public void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + + try { + Provider provider = OAuthUtilities.getProvider(request); + String user_id = request.getParameter("user_id"); + response.setCharacterEncoding("UTF-8"); + response.setHeader("Content-Type", "application/json"); + String user_badges = FreebaseUtils.getUserBadges(provider, user_id); + response.getWriter().write(user_badges); + } catch (Exception e) { + logger.info("error",e); + respondException(response, e); + } + } + +} diff --git a/main/src/com/google/refine/commands/browsing/ComputeClustersCommand.java b/main/src/com/google/refine/commands/browsing/ComputeClustersCommand.java new file mode 100644 index 000000000..40c30157a --- /dev/null +++ b/main/src/com/google/refine/commands/browsing/ComputeClustersCommand.java @@ -0,0 +1,53 @@ +package com.google.refine.commands.browsing; + +import java.io.IOException; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.json.JSONObject; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.refine.browsing.Engine; +import com.google.refine.clustering.Clusterer; +import com.google.refine.clustering.binning.BinningClusterer; +import com.google.refine.clustering.knn.kNNClusterer; +import com.google.refine.commands.Command; +import com.google.refine.model.Project; + +public class ComputeClustersCommand extends Command { + + final static Logger logger = LoggerFactory.getLogger("compute-clusters_command"); + + @Override + public void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + try { + long start = System.currentTimeMillis(); + Project project = getProject(request); + Engine engine = getEngine(request, project); + JSONObject clusterer_conf = getJsonParameter(request,"clusterer"); + + Clusterer clusterer = null; + String type = clusterer_conf.has("type") ? clusterer_conf.getString("type") : "binning"; + + if ("knn".equals(type)) { + clusterer = new kNNClusterer(); + } else { + clusterer = new BinningClusterer(); + } + + clusterer.initializeFromJSON(project, clusterer_conf); + + clusterer.computeClusters(engine); + + respondJSON(response, clusterer); + logger.info("computed clusters [{},{}] in {}ms", new Object[] { type, clusterer_conf.getString("function"), Long.toString(System.currentTimeMillis() - start) }); + } catch (Exception e) { + respondException(response, e); + } + } +} diff --git a/main/src/com/google/refine/commands/browsing/ComputeFacetsCommand.java b/main/src/com/google/refine/commands/browsing/ComputeFacetsCommand.java new file mode 100644 index 000000000..0cdffbb56 --- /dev/null +++ b/main/src/com/google/refine/commands/browsing/ComputeFacetsCommand.java @@ -0,0 +1,29 @@ +package com.google.refine.commands.browsing; + +import java.io.IOException; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import com.google.refine.browsing.Engine; +import com.google.refine.commands.Command; +import com.google.refine.model.Project; + +public class ComputeFacetsCommand extends Command { + @Override + public void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + try { + Project project = getProject(request); + Engine engine = getEngine(request, project); + + engine.computeFacets(); + + respondJSON(response, engine); + } catch (Exception e) { + respondException(response, e); + } + } +} diff --git a/main/src/com/google/refine/commands/browsing/GetScatterplotCommand.java b/main/src/com/google/refine/commands/browsing/GetScatterplotCommand.java new file mode 100644 index 000000000..40a21d639 --- /dev/null +++ b/main/src/com/google/refine/commands/browsing/GetScatterplotCommand.java @@ -0,0 +1,175 @@ +package com.google.refine.commands.browsing; + +import java.awt.Color; +import java.awt.image.BufferedImage; +import java.io.IOException; +import java.io.OutputStream; + +import javax.imageio.ImageIO; +import javax.servlet.ServletException; +import javax.servlet.ServletOutputStream; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.json.JSONException; +import org.json.JSONObject; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.refine.browsing.Engine; +import com.google.refine.browsing.FilteredRows; +import com.google.refine.browsing.facets.ScatterplotDrawingRowVisitor; +import com.google.refine.browsing.facets.ScatterplotFacet; +import com.google.refine.browsing.util.NumericBinIndex; +import com.google.refine.commands.Command; +import com.google.refine.expr.Evaluable; +import com.google.refine.expr.MetaParser; +import com.google.refine.expr.ParsingException; +import com.google.refine.model.Column; +import com.google.refine.model.Project; + +public class GetScatterplotCommand extends Command { + + final static Logger logger = LoggerFactory.getLogger("get-scatterplot_command"); + + public void doGet(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + try { + long start = System.currentTimeMillis(); + + Project project = getProject(request); + Engine engine = getEngine(request, project); + JSONObject conf = getJsonParameter(request,"plotter"); + + response.setHeader("Content-Type", "image/png"); + + ServletOutputStream sos = null; + + try { + sos = response.getOutputStream(); + draw(sos, project, engine, conf); + } finally { + sos.close(); + } + + logger.trace("Drawn scatterplot in {} ms", Long.toString(System.currentTimeMillis() - start)); + } catch (Exception e) { + e.printStackTrace(); + respondException(response, e); + } + } + + public void draw(OutputStream output, Project project, Engine engine, JSONObject o) throws IOException, JSONException { + + double min_x = 0; + double min_y = 0; + double max_x = 0; + double max_y = 0; + + int columnIndex_x = 0; + int columnIndex_y = 0; + + Evaluable eval_x = null; + Evaluable eval_y = null; + + int size = (o.has(ScatterplotFacet.SIZE)) ? o.getInt(ScatterplotFacet.SIZE) : 100; + + double dot = (o.has(ScatterplotFacet.DOT)) ? o.getDouble(ScatterplotFacet.DOT) : 100; + + int dim_x = (o.has(ScatterplotFacet.DIM_X)) ? ScatterplotFacet.getAxisDim(o.getString(ScatterplotFacet.DIM_X)) : ScatterplotFacet.LIN; + int dim_y = (o.has(ScatterplotFacet.DIM_Y)) ? ScatterplotFacet.getAxisDim(o.getString(ScatterplotFacet.DIM_Y)) : ScatterplotFacet.LIN; + + int rotation = (o.has(ScatterplotFacet.ROTATION)) ? ScatterplotFacet.getRotation(o.getString(ScatterplotFacet.ROTATION)) : ScatterplotFacet.NO_ROTATION; + + String color_str = (o.has(ScatterplotFacet.COLOR)) ? o.getString(ScatterplotFacet.COLOR) : "000000"; + Color color = new Color(Integer.parseInt(color_str,16)); + + String base_color_str = (o.has(ScatterplotFacet.BASE_COLOR)) ? o.getString(ScatterplotFacet.BASE_COLOR) : null; + Color base_color = base_color_str != null ? new Color(Integer.parseInt(base_color_str,16)) : null; + + String columnName_x = o.getString(ScatterplotFacet.X_COLUMN_NAME); + String expression_x = (o.has(ScatterplotFacet.X_EXPRESSION)) ? o.getString(ScatterplotFacet.X_EXPRESSION) : "value"; + + if (columnName_x.length() > 0) { + Column x_column = project.columnModel.getColumnByName(columnName_x); + if (x_column != null) { + columnIndex_x = x_column.getCellIndex(); + } + } else { + columnIndex_x = -1; + } + + try { + eval_x = MetaParser.parse(expression_x); + } catch (ParsingException e) { + logger.warn("error parsing expression", e); + } + + String columnName_y = o.getString(ScatterplotFacet.Y_COLUMN_NAME); + String expression_y = (o.has(ScatterplotFacet.Y_EXPRESSION)) ? o.getString(ScatterplotFacet.Y_EXPRESSION) : "value"; + + if (columnName_y.length() > 0) { + Column y_column = project.columnModel.getColumnByName(columnName_y); + if (y_column != null) { + columnIndex_y = y_column.getCellIndex(); + } + } else { + columnIndex_y = -1; + } + + try { + eval_y = MetaParser.parse(expression_y); + } catch (ParsingException e) { + logger.warn("error parsing expression", e); + } + + NumericBinIndex index_x = null; + NumericBinIndex index_y = null; + + String col_x_name = o.getString(ScatterplotFacet.X_COLUMN_NAME); + Column column_x = project.columnModel.getColumnByName(col_x_name); + if (column_x != null) { + columnIndex_x = column_x.getCellIndex(); + index_x = ScatterplotFacet.getBinIndex(project, column_x, eval_x, expression_x); + min_x = index_x.getMin(); + max_x = index_x.getMax(); + } + + String col_y_name = o.getString(ScatterplotFacet.Y_COLUMN_NAME); + Column column_y = project.columnModel.getColumnByName(col_y_name); + if (column_y != null) { + columnIndex_y = column_y.getCellIndex(); + index_y = ScatterplotFacet.getBinIndex(project, column_y, eval_y, expression_y); + min_y = index_y.getMin(); + max_y = index_y.getMax(); + } + + if (index_x != null && index_y != null && index_x.isNumeric() && index_y.isNumeric()) { + ScatterplotDrawingRowVisitor drawer = new ScatterplotDrawingRowVisitor( + columnIndex_x, columnIndex_y, min_x, max_x, min_y, max_y, + size, dim_x, dim_y, rotation, dot, color + ); + + if (base_color != null) { + drawer.setColor(base_color); + + FilteredRows filteredRows = engine.getAllRows(); + filteredRows.accept(project, drawer); + + drawer.setColor(color); + } + + { + FilteredRows filteredRows = engine.getAllFilteredRows(); + filteredRows.accept(project, drawer); + } + + ImageIO.write(drawer.getImage(), "png", output); + } else { + ImageIO.write(new BufferedImage(1, 1, BufferedImage.TYPE_4BYTE_ABGR), "png", output); + } + + } + +} diff --git a/main/src/com/google/refine/commands/cell/BlankDownCommand.java b/main/src/com/google/refine/commands/cell/BlankDownCommand.java new file mode 100644 index 000000000..3de094e01 --- /dev/null +++ b/main/src/com/google/refine/commands/cell/BlankDownCommand.java @@ -0,0 +1,24 @@ +package com.google.refine.commands.cell; + +import javax.servlet.http.HttpServletRequest; + +import org.json.JSONObject; + +import com.google.refine.commands.EngineDependentCommand; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; +import com.google.refine.operations.cell.BlankDownOperation; + +public class BlankDownCommand extends EngineDependentCommand { + @Override + protected AbstractOperation createOperation(Project project, + HttpServletRequest request, JSONObject engineConfig) throws Exception { + + String columnName = request.getParameter("columnName"); + + return new BlankDownOperation( + engineConfig, + columnName + ); + } +} diff --git a/main/src/com/google/refine/commands/cell/EditOneCellCommand.java b/main/src/com/google/refine/commands/cell/EditOneCellCommand.java new file mode 100644 index 000000000..c6ca37b4e --- /dev/null +++ b/main/src/com/google/refine/commands/cell/EditOneCellCommand.java @@ -0,0 +1,128 @@ +package com.google.refine.commands.cell; + +import java.io.IOException; +import java.io.Serializable; +import java.util.Properties; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.json.JSONWriter; + +import com.google.refine.commands.Command; +import com.google.refine.history.Change; +import com.google.refine.history.HistoryEntry; +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.changes.CellChange; +import com.google.refine.process.QuickHistoryEntryProcess; +import com.google.refine.util.ParsingUtilities; +import com.google.refine.util.Pool; + +public class EditOneCellCommand extends Command { + @Override + public void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + try { + request.setCharacterEncoding("UTF-8"); + response.setCharacterEncoding("UTF-8"); + + Project project = getProject(request); + + int rowIndex = Integer.parseInt(request.getParameter("row")); + int cellIndex = Integer.parseInt(request.getParameter("cell")); + + String type = request.getParameter("type"); + String valueString = request.getParameter("value"); + Serializable value = null; + + if ("number".equals(type)) { + value = Double.parseDouble(valueString); + } else if ("boolean".equals(type)) { + value = "true".equalsIgnoreCase(valueString); + } else if ("date".equals(type)) { + value = ParsingUtilities.stringToDate(valueString); + } else { + value = valueString; + } + + EditOneCellProcess process = new EditOneCellProcess( + project, + "Edit single cell", + rowIndex, + cellIndex, + value + ); + + HistoryEntry historyEntry = project.processManager.queueProcess(process); + if (historyEntry != null) { + /* + * If the operation has been done, return the new cell's data + * so the client side can update the cell's rendering right away. + */ + JSONWriter writer = new JSONWriter(response.getWriter()); + + Pool pool = new Pool(); + Properties options = new Properties(); + options.put("pool", pool); + + writer.object(); + writer.key("code"); writer.value("ok"); + writer.key("historyEntry"); historyEntry.write(writer, options); + writer.key("cell"); process.newCell.write(writer, options); + writer.key("pool"); pool.write(writer, options); + writer.endObject(); + } else { + respond(response, "{ \"code\" : \"pending\" }"); + } + } catch (Exception e) { + respondException(response, e); + } + } + + protected static class EditOneCellProcess extends QuickHistoryEntryProcess { + final int rowIndex; + final int cellIndex; + final Serializable value; + Cell newCell; + + EditOneCellProcess( + Project project, + String briefDescription, + int rowIndex, + int cellIndex, + Serializable value + ) { + super(project, briefDescription); + + this.rowIndex = rowIndex; + this.cellIndex = cellIndex; + this.value = value; + } + + protected HistoryEntry createHistoryEntry(long historyEntryID) throws Exception { + Cell cell = _project.rows.get(rowIndex).getCell(cellIndex); + Column column = _project.columnModel.getColumnByCellIndex(cellIndex); + if (column == null) { + throw new Exception("No such column"); + } + + newCell = new Cell( + value, + cell != null ? cell.recon : null + ); + + String description = + "Edit single cell on row " + (rowIndex + 1) + + ", column " + column.getName(); + + Change change = new CellChange(rowIndex, cellIndex, cell, newCell); + + return new HistoryEntry( + historyEntryID, _project, description, null, change); + } + } +} diff --git a/main/src/com/google/refine/commands/cell/FillDownCommand.java b/main/src/com/google/refine/commands/cell/FillDownCommand.java new file mode 100644 index 000000000..62445ea1b --- /dev/null +++ b/main/src/com/google/refine/commands/cell/FillDownCommand.java @@ -0,0 +1,24 @@ +package com.google.refine.commands.cell; + +import javax.servlet.http.HttpServletRequest; + +import org.json.JSONObject; + +import com.google.refine.commands.EngineDependentCommand; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; +import com.google.refine.operations.cell.FillDownOperation; + +public class FillDownCommand extends EngineDependentCommand { + @Override + protected AbstractOperation createOperation(Project project, + HttpServletRequest request, JSONObject engineConfig) throws Exception { + + String columnName = request.getParameter("columnName"); + + return new FillDownOperation( + engineConfig, + columnName + ); + } +} diff --git a/main/src/com/google/refine/commands/cell/JoinMultiValueCellsCommand.java b/main/src/com/google/refine/commands/cell/JoinMultiValueCellsCommand.java new file mode 100644 index 000000000..7e348f8b6 --- /dev/null +++ b/main/src/com/google/refine/commands/cell/JoinMultiValueCellsCommand.java @@ -0,0 +1,36 @@ +package com.google.refine.commands.cell; + +import java.io.IOException; +import java.util.Properties; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import com.google.refine.commands.Command; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; +import com.google.refine.operations.cell.MultiValuedCellJoinOperation; +import com.google.refine.process.Process; + +public class JoinMultiValueCellsCommand extends Command { + @Override + public void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + try { + Project project = getProject(request); + + String columnName = request.getParameter("columnName"); + String keyColumnName = request.getParameter("keyColumnName"); + String separator = request.getParameter("separator"); + + AbstractOperation op = new MultiValuedCellJoinOperation(columnName, keyColumnName, separator); + Process process = op.createProcess(project, new Properties()); + + performProcessAndRespond(request, response, project, process); + } catch (Exception e) { + respondException(response, e); + } + } +} diff --git a/main/src/com/google/refine/commands/cell/MassEditCommand.java b/main/src/com/google/refine/commands/cell/MassEditCommand.java new file mode 100644 index 000000000..5e853a0df --- /dev/null +++ b/main/src/com/google/refine/commands/cell/MassEditCommand.java @@ -0,0 +1,29 @@ +package com.google.refine.commands.cell; + +import javax.servlet.http.HttpServletRequest; + +import org.json.JSONObject; + +import com.google.refine.commands.EngineDependentCommand; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; +import com.google.refine.operations.cell.MassEditOperation; +import com.google.refine.util.ParsingUtilities; + +public class MassEditCommand extends EngineDependentCommand { + @Override + protected AbstractOperation createOperation(Project project, + HttpServletRequest request, JSONObject engineConfig) throws Exception { + + String columnName = request.getParameter("columnName"); + String expression = request.getParameter("expression"); + String editsString = request.getParameter("edits"); + + return new MassEditOperation( + engineConfig, + columnName, + expression, + MassEditOperation.reconstructEdits(ParsingUtilities.evaluateJsonStringToArray(editsString)) + ); + } +} diff --git a/main/src/com/google/refine/commands/cell/SplitMultiValueCellsCommand.java b/main/src/com/google/refine/commands/cell/SplitMultiValueCellsCommand.java new file mode 100644 index 000000000..f7e45d130 --- /dev/null +++ b/main/src/com/google/refine/commands/cell/SplitMultiValueCellsCommand.java @@ -0,0 +1,37 @@ +package com.google.refine.commands.cell; + + import java.io.IOException; +import java.util.Properties; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import com.google.refine.commands.Command; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; +import com.google.refine.operations.cell.MultiValuedCellSplitOperation; +import com.google.refine.process.Process; + +public class SplitMultiValueCellsCommand extends Command { + @Override + public void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + try { + Project project = getProject(request); + + String columnName = request.getParameter("columnName"); + String keyColumnName = request.getParameter("keyColumnName"); + String separator = request.getParameter("separator"); + String mode = request.getParameter("mode"); + + AbstractOperation op = new MultiValuedCellSplitOperation(columnName, keyColumnName, separator, mode); + Process process = op.createProcess(project, new Properties()); + + performProcessAndRespond(request, response, project, process); + } catch (Exception e) { + respondException(response, e); + } + } +} diff --git a/main/src/com/google/refine/commands/cell/TextTransformCommand.java b/main/src/com/google/refine/commands/cell/TextTransformCommand.java new file mode 100644 index 000000000..3a516e1d7 --- /dev/null +++ b/main/src/com/google/refine/commands/cell/TextTransformCommand.java @@ -0,0 +1,38 @@ +package com.google.refine.commands.cell; + +import javax.servlet.http.HttpServletRequest; + +import org.json.JSONObject; + +import com.google.refine.commands.EngineDependentCommand; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; +import com.google.refine.operations.cell.TextTransformOperation; + +public class TextTransformCommand extends EngineDependentCommand { + @Override + protected AbstractOperation createOperation(Project project, + HttpServletRequest request, JSONObject engineConfig) throws Exception { + + String columnName = request.getParameter("columnName"); + String expression = request.getParameter("expression"); + String onError = request.getParameter("onError"); + boolean repeat = "true".equals(request.getParameter("repeat")); + + int repeatCount = 10; + String repeatCountString = request.getParameter("repeatCount"); + try { + repeatCount = Math.max(Math.min(Integer.parseInt(repeatCountString), 10), 0); + } catch (Exception e) { + } + + return new TextTransformOperation( + engineConfig, + columnName, + expression, + TextTransformOperation.stringToOnError(onError), + repeat, + repeatCount + ); + } +} diff --git a/main/src/com/google/refine/commands/cell/TransposeColumnsIntoRowsCommand.java b/main/src/com/google/refine/commands/cell/TransposeColumnsIntoRowsCommand.java new file mode 100644 index 000000000..cb82efa5c --- /dev/null +++ b/main/src/com/google/refine/commands/cell/TransposeColumnsIntoRowsCommand.java @@ -0,0 +1,42 @@ +package com.google.refine.commands.cell; + +import java.io.IOException; +import java.util.Properties; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import com.google.refine.commands.Command; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; +import com.google.refine.operations.cell.TransposeColumnsIntoRowsOperation; +import com.google.refine.process.Process; + +public class TransposeColumnsIntoRowsCommand extends Command { + @Override + public void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + try { + Project project = getProject(request); + + String startColumnName = request.getParameter("startColumnName"); + int columnCount = Integer.parseInt(request.getParameter("columnCount")); + String combinedColumnName = request.getParameter("combinedColumnName"); + + boolean prependColumnName = Boolean.parseBoolean(request.getParameter("prependColumnName")); + String separator = request.getParameter("separator"); + boolean ignoreBlankCells = Boolean.parseBoolean(request.getParameter("ignoreBlankCells")); + + AbstractOperation op = new TransposeColumnsIntoRowsOperation( + startColumnName, columnCount, combinedColumnName, prependColumnName, separator, ignoreBlankCells); + + Process process = op.createProcess(project, new Properties()); + + performProcessAndRespond(request, response, project, process); + } catch (Exception e) { + respondException(response, e); + } + } +} diff --git a/main/src/com/google/refine/commands/cell/TransposeRowsIntoColumnsCommand.java b/main/src/com/google/refine/commands/cell/TransposeRowsIntoColumnsCommand.java new file mode 100644 index 000000000..ad05b6f09 --- /dev/null +++ b/main/src/com/google/refine/commands/cell/TransposeRowsIntoColumnsCommand.java @@ -0,0 +1,37 @@ +package com.google.refine.commands.cell; + +import java.io.IOException; +import java.util.Properties; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import com.google.refine.commands.Command; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; +import com.google.refine.operations.cell.TransposeRowsIntoColumnsOperation; +import com.google.refine.process.Process; + +public class TransposeRowsIntoColumnsCommand extends Command { + @Override + public void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + try { + Project project = getProject(request); + + String columnName = request.getParameter("columnName"); + int rowCount = Integer.parseInt(request.getParameter("rowCount")); + + AbstractOperation op = new TransposeRowsIntoColumnsOperation( + columnName, rowCount); + + Process process = op.createProcess(project, new Properties()); + + performProcessAndRespond(request, response, project, process); + } catch (Exception e) { + respondException(response, e); + } + } +} diff --git a/main/src/com/google/refine/commands/column/AddColumnByFetchingURLsCommand.java b/main/src/com/google/refine/commands/column/AddColumnByFetchingURLsCommand.java new file mode 100644 index 000000000..52be38065 --- /dev/null +++ b/main/src/com/google/refine/commands/column/AddColumnByFetchingURLsCommand.java @@ -0,0 +1,36 @@ +package com.google.refine.commands.column; + +import javax.servlet.http.HttpServletRequest; + +import org.json.JSONObject; + +import com.google.refine.commands.EngineDependentCommand; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; +import com.google.refine.operations.cell.TextTransformOperation; +import com.google.refine.operations.column.ColumnAdditionByFetchingURLsOperation; + +public class AddColumnByFetchingURLsCommand extends EngineDependentCommand { + @Override + protected AbstractOperation createOperation(Project project, + HttpServletRequest request, JSONObject engineConfig) throws Exception { + + String baseColumnName = request.getParameter("baseColumnName"); + String urlExpression = request.getParameter("urlExpression"); + String newColumnName = request.getParameter("newColumnName"); + int columnInsertIndex = Integer.parseInt(request.getParameter("columnInsertIndex")); + int delay = Integer.parseInt(request.getParameter("delay")); + String onError = request.getParameter("onError"); + + return new ColumnAdditionByFetchingURLsOperation( + engineConfig, + baseColumnName, + urlExpression, + TextTransformOperation.stringToOnError(onError), + newColumnName, + columnInsertIndex, + delay + ); + } + +} diff --git a/main/src/com/google/refine/commands/column/AddColumnCommand.java b/main/src/com/google/refine/commands/column/AddColumnCommand.java new file mode 100644 index 000000000..c6febbe94 --- /dev/null +++ b/main/src/com/google/refine/commands/column/AddColumnCommand.java @@ -0,0 +1,34 @@ +package com.google.refine.commands.column; + +import javax.servlet.http.HttpServletRequest; + +import org.json.JSONObject; + +import com.google.refine.commands.EngineDependentCommand; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; +import com.google.refine.operations.cell.TextTransformOperation; +import com.google.refine.operations.column.ColumnAdditionOperation; + +public class AddColumnCommand extends EngineDependentCommand { + @Override + protected AbstractOperation createOperation(Project project, + HttpServletRequest request, JSONObject engineConfig) throws Exception { + + String baseColumnName = request.getParameter("baseColumnName"); + String expression = request.getParameter("expression"); + String newColumnName = request.getParameter("newColumnName"); + int columnInsertIndex = Integer.parseInt(request.getParameter("columnInsertIndex")); + String onError = request.getParameter("onError"); + + return new ColumnAdditionOperation( + engineConfig, + baseColumnName, + expression, + TextTransformOperation.stringToOnError(onError), + newColumnName, + columnInsertIndex + ); + } + +} diff --git a/main/src/com/google/refine/commands/column/ExtendDataCommand.java b/main/src/com/google/refine/commands/column/ExtendDataCommand.java new file mode 100644 index 000000000..adc882e05 --- /dev/null +++ b/main/src/com/google/refine/commands/column/ExtendDataCommand.java @@ -0,0 +1,32 @@ +package com.google.refine.commands.column; + +import javax.servlet.http.HttpServletRequest; + +import org.json.JSONObject; + +import com.google.refine.commands.EngineDependentCommand; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; +import com.google.refine.operations.column.ExtendDataOperation; +import com.google.refine.util.ParsingUtilities; + +public class ExtendDataCommand extends EngineDependentCommand { + @Override + protected AbstractOperation createOperation(Project project, + HttpServletRequest request, JSONObject engineConfig) throws Exception { + + String baseColumnName = request.getParameter("baseColumnName"); + int columnInsertIndex = Integer.parseInt(request.getParameter("columnInsertIndex")); + + String jsonString = request.getParameter("extension"); + JSONObject extension = ParsingUtilities.evaluateJsonStringToObject(jsonString); + + return new ExtendDataOperation( + engineConfig, + baseColumnName, + extension, + columnInsertIndex + ); + } + +} diff --git a/main/src/com/google/refine/commands/column/GetColumnsInfoCommand.java b/main/src/com/google/refine/commands/column/GetColumnsInfoCommand.java new file mode 100644 index 000000000..0188225dd --- /dev/null +++ b/main/src/com/google/refine/commands/column/GetColumnsInfoCommand.java @@ -0,0 +1,99 @@ +package com.google.refine.commands.column; + +import java.io.IOException; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.browsing.util.ExpressionBasedRowEvaluable; +import com.google.refine.browsing.util.NumericBinIndex; +import com.google.refine.browsing.util.NumericBinRowIndex; +import com.google.refine.commands.Command; +import com.google.refine.expr.Evaluable; +import com.google.refine.expr.MetaParser; +import com.google.refine.expr.ParsingException; +import com.google.refine.model.Column; +import com.google.refine.model.Project; + +public class GetColumnsInfoCommand extends Command { + + public void doGet(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + try { + //long start = System.currentTimeMillis(); + + response.setCharacterEncoding("UTF-8"); + response.setHeader("Content-Type", "application/json"); + + Project project = getProject(request); + //Engine engine = getEngine(request, project); + + JSONWriter writer = new JSONWriter(response.getWriter()); + + writer.array(); + for (Column column : project.columnModel.columns) { + writer.object(); + write(project, column, writer); + writer.endObject(); + } + writer.endArray(); + + //Gridworks.log("Obtained columns info in " + (System.currentTimeMillis() - start) + "ms"); + } catch (Exception e) { + e.printStackTrace(); + respondException(response, e); + } + } + + private NumericBinIndex getBinIndex(Project project, Column column) { + String expression = "value"; + String key = "numeric-bin:" + expression; + Evaluable eval = null; + try { + eval = MetaParser.parse(expression); + } catch (ParsingException e) { + // this should never happen + } + NumericBinIndex index = (NumericBinIndex) column.getPrecompute(key); + if (index == null) { + index = new NumericBinRowIndex(project, new ExpressionBasedRowEvaluable(column.getName(), column.getCellIndex(), eval)); + column.setPrecompute(key, index); + } + return index; + } + + private void write(Project project, Column column, JSONWriter writer) throws JSONException { + NumericBinIndex columnIndex = getBinIndex(project, column); + if (columnIndex != null) { + writer.key("name"); + writer.value(column.getName()); + boolean is_numeric = columnIndex.isNumeric(); + writer.key("is_numeric"); + writer.value(is_numeric); + writer.key("numeric_row_count"); + writer.value(columnIndex.getNumericRowCount()); + writer.key("non_numeric_row_count"); + writer.value(columnIndex.getNonNumericRowCount()); + writer.key("error_row_count"); + writer.value(columnIndex.getErrorRowCount()); + writer.key("blank_row_count"); + writer.value(columnIndex.getBlankRowCount()); + if (is_numeric) { + writer.key("min"); + writer.value(columnIndex.getMin()); + writer.key("max"); + writer.value(columnIndex.getMax()); + writer.key("step"); + writer.value(columnIndex.getStep()); + } + } else { + writer.key("error"); + writer.value("error finding numeric information on the '" + column.getName() + "' column"); + } + } +} diff --git a/main/src/com/google/refine/commands/column/MoveColumnCommand.java b/main/src/com/google/refine/commands/column/MoveColumnCommand.java new file mode 100644 index 000000000..b7245dae3 --- /dev/null +++ b/main/src/com/google/refine/commands/column/MoveColumnCommand.java @@ -0,0 +1,35 @@ +package com.google.refine.commands.column; + +import java.io.IOException; +import java.util.Properties; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import com.google.refine.commands.Command; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; +import com.google.refine.operations.column.ColumnMoveOperation; +import com.google.refine.process.Process; + +public class MoveColumnCommand extends Command { + @Override + public void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + try { + Project project = getProject(request); + + String columnName = request.getParameter("columnName"); + int index = Integer.parseInt(request.getParameter("index")); + + AbstractOperation op = new ColumnMoveOperation(columnName, index); + Process process = op.createProcess(project, new Properties()); + + performProcessAndRespond(request, response, project, process); + } catch (Exception e) { + respondException(response, e); + } + } +} diff --git a/main/src/com/google/refine/commands/column/PreviewExtendDataCommand.java b/main/src/com/google/refine/commands/column/PreviewExtendDataCommand.java new file mode 100644 index 000000000..57b7e04e2 --- /dev/null +++ b/main/src/com/google/refine/commands/column/PreviewExtendDataCommand.java @@ -0,0 +1,158 @@ +package com.google.refine.commands.column; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.json.JSONArray; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.commands.Command; +import com.google.refine.model.Cell; +import com.google.refine.model.Project; +import com.google.refine.model.ReconCandidate; +import com.google.refine.model.Row; +import com.google.refine.util.FreebaseDataExtensionJob; +import com.google.refine.util.ParsingUtilities; +import com.google.refine.util.FreebaseDataExtensionJob.ColumnInfo; +import com.google.refine.util.FreebaseDataExtensionJob.DataExtension; + +public class PreviewExtendDataCommand extends Command { + + @Override + public void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + try { + Project project = getProject(request); + String columnName = request.getParameter("columnName"); + + String rowIndicesString = request.getParameter("rowIndices"); + if (rowIndicesString == null) { + respond(response, "{ \"code\" : \"error\", \"message\" : \"No row indices specified\" }"); + return; + } + + String jsonString = request.getParameter("extension"); + JSONObject json = ParsingUtilities.evaluateJsonStringToObject(jsonString); + + JSONArray rowIndices = ParsingUtilities.evaluateJsonStringToArray(rowIndicesString); + int length = rowIndices.length(); + int cellIndex = project.columnModel.getColumnByName(columnName).getCellIndex(); + + List topicNames = new ArrayList(); + List topicIds = new ArrayList(); + Set ids = new HashSet(); + for (int i = 0; i < length; i++) { + int rowIndex = rowIndices.getInt(i); + if (rowIndex >= 0 && rowIndex < project.rows.size()) { + Row row = project.rows.get(rowIndex); + Cell cell = row.getCell(cellIndex); + if (cell != null && cell.recon != null && cell.recon.match != null) { + topicNames.add(cell.recon.match.name); + topicIds.add(cell.recon.match.id); + ids.add(cell.recon.match.id); + } else { + topicNames.add(null); + topicIds.add(null); + ids.add(null); + } + } + } + + Map reconCandidateMap = new HashMap(); + FreebaseDataExtensionJob job = new FreebaseDataExtensionJob(json); + Map map = job.extend(ids, reconCandidateMap); + + response.setCharacterEncoding("UTF-8"); + response.setHeader("Content-Type", "application/json"); + + JSONWriter writer = new JSONWriter(response.getWriter()); + writer.object(); + writer.key("code"); writer.value("ok"); + writer.key("columns"); + writer.array(); + for (ColumnInfo info : job.columns) { + writer.object(); + writer.key("names"); + writer.array(); + for (String name : info.names) { + writer.value(name); + } + writer.endArray(); + writer.key("path"); + writer.array(); + for (String id : info.path) { + writer.value(id); + } + writer.endArray(); + writer.endObject(); + } + writer.endArray(); + + writer.key("rows"); + writer.array(); + for (int r = 0; r < topicNames.size(); r++) { + String id = topicIds.get(r); + String topicName = topicNames.get(r); + + if (id != null && map.containsKey(id)) { + DataExtension ext = map.get(id); + boolean first = true; + + if (ext.data.length > 0) { + for (Object[] row : ext.data) { + writer.array(); + if (first) { + writer.value(topicName); + first = false; + } else { + writer.value(null); + } + + for (Object cell : row) { + if (cell != null && cell instanceof ReconCandidate) { + ReconCandidate rc = (ReconCandidate) cell; + writer.object(); + writer.key("id"); writer.value(rc.id); + writer.key("name"); writer.value(rc.name); + writer.endObject(); + } else { + writer.value(cell); + } + } + + writer.endArray(); + } + continue; + } + } + + writer.array(); + if (id != null) { + writer.object(); + writer.key("id"); writer.value(id); + writer.key("name"); writer.value(topicName); + writer.endObject(); + } else { + writer.value(""); + } + writer.endArray(); + } + writer.endArray(); + + writer.endObject(); + } catch (Exception e) { + respondException(response, e); + } + } +} diff --git a/main/src/com/google/refine/commands/column/RemoveColumnCommand.java b/main/src/com/google/refine/commands/column/RemoveColumnCommand.java new file mode 100644 index 000000000..1365644b7 --- /dev/null +++ b/main/src/com/google/refine/commands/column/RemoveColumnCommand.java @@ -0,0 +1,34 @@ +package com.google.refine.commands.column; + +import java.io.IOException; +import java.util.Properties; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import com.google.refine.commands.Command; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; +import com.google.refine.operations.column.ColumnRemovalOperation; +import com.google.refine.process.Process; + +public class RemoveColumnCommand extends Command { + @Override + public void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + try { + Project project = getProject(request); + + String columnName = request.getParameter("columnName"); + + AbstractOperation op = new ColumnRemovalOperation(columnName); + Process process = op.createProcess(project, new Properties()); + + performProcessAndRespond(request, response, project, process); + } catch (Exception e) { + respondException(response, e); + } + } +} diff --git a/main/src/com/google/refine/commands/column/RenameColumnCommand.java b/main/src/com/google/refine/commands/column/RenameColumnCommand.java new file mode 100644 index 000000000..1516dcd62 --- /dev/null +++ b/main/src/com/google/refine/commands/column/RenameColumnCommand.java @@ -0,0 +1,35 @@ +package com.google.refine.commands.column; + +import java.io.IOException; +import java.util.Properties; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import com.google.refine.commands.Command; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; +import com.google.refine.operations.column.ColumnRenameOperation; +import com.google.refine.process.Process; + +public class RenameColumnCommand extends Command { + @Override + public void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + try { + Project project = getProject(request); + + String oldColumnName = request.getParameter("oldColumnName"); + String newColumnName = request.getParameter("newColumnName"); + + AbstractOperation op = new ColumnRenameOperation(oldColumnName, newColumnName); + Process process = op.createProcess(project, new Properties()); + + performProcessAndRespond(request, response, project, process); + } catch (Exception e) { + respondException(response, e); + } + } +} diff --git a/main/src/com/google/refine/commands/column/ReorderColumnsCommand.java b/main/src/com/google/refine/commands/column/ReorderColumnsCommand.java new file mode 100644 index 000000000..d841adb6e --- /dev/null +++ b/main/src/com/google/refine/commands/column/ReorderColumnsCommand.java @@ -0,0 +1,25 @@ +package com.google.refine.commands.column; + +import javax.servlet.http.HttpServletRequest; + +import org.json.JSONObject; + +import com.google.refine.commands.EngineDependentCommand; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; +import com.google.refine.operations.column.ColumnReorderOperation; +import com.google.refine.util.JSONUtilities; +import com.google.refine.util.ParsingUtilities; + +public class ReorderColumnsCommand extends EngineDependentCommand { + + @Override + protected AbstractOperation createOperation(Project project, + HttpServletRequest request, JSONObject engineConfig) throws Exception { + + String columnNames = request.getParameter("columnNames"); + return new ColumnReorderOperation( + JSONUtilities.toStringList( + ParsingUtilities.evaluateJsonStringToArray(columnNames))); + } +} diff --git a/main/src/com/google/refine/commands/column/SplitColumnCommand.java b/main/src/com/google/refine/commands/column/SplitColumnCommand.java new file mode 100644 index 000000000..7a04c2028 --- /dev/null +++ b/main/src/com/google/refine/commands/column/SplitColumnCommand.java @@ -0,0 +1,54 @@ +package com.google.refine.commands.column; + +import javax.servlet.http.HttpServletRequest; + +import org.json.JSONArray; +import org.json.JSONObject; + +import com.google.refine.commands.EngineDependentCommand; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; +import com.google.refine.operations.column.ColumnSplitOperation; +import com.google.refine.util.ParsingUtilities; + +public class SplitColumnCommand extends EngineDependentCommand { + @Override + protected AbstractOperation createOperation(Project project, + HttpServletRequest request, JSONObject engineConfig) throws Exception { + + String columnName = request.getParameter("columnName"); + boolean guessCellType = Boolean.parseBoolean(request.getParameter("guessCellType")); + boolean removeOriginalColumn = Boolean.parseBoolean(request.getParameter("removeOriginalColumn")); + String mode = request.getParameter("mode"); + if ("separator".equals(mode)) { + String maxColumns = request.getParameter("maxColumns"); + + return new ColumnSplitOperation( + engineConfig, + columnName, + guessCellType, + removeOriginalColumn, + request.getParameter("separator"), + Boolean.parseBoolean(request.getParameter("regex")), + maxColumns != null && maxColumns.length() > 0 ? Integer.parseInt(maxColumns) : 0 + ); + } else { + String s = request.getParameter("fieldLengths"); + + JSONArray a = ParsingUtilities.evaluateJsonStringToArray(s); + int[] fieldLengths = new int[a.length()]; + + for (int i = 0; i < fieldLengths.length; i++) { + fieldLengths[i] = a.getInt(i); + } + + return new ColumnSplitOperation( + engineConfig, + columnName, + guessCellType, + removeOriginalColumn, + fieldLengths + ); + } + } +} diff --git a/main/src/com/google/refine/commands/expr/GetExpressionHistoryCommand.java b/main/src/com/google/refine/commands/expr/GetExpressionHistoryCommand.java new file mode 100644 index 000000000..2ee431323 --- /dev/null +++ b/main/src/com/google/refine/commands/expr/GetExpressionHistoryCommand.java @@ -0,0 +1,68 @@ +package com.google.refine.commands.expr; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.json.JSONWriter; + +import com.google.refine.ProjectManager; +import com.google.refine.commands.Command; +import com.google.refine.model.Project; +import com.google.refine.preference.TopList; + +public class GetExpressionHistoryCommand extends Command { + + static protected List toExpressionList(Object o) { + return o == null ? new ArrayList() : ((TopList) o).getList(); + } + + @Override + public void doGet(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + try { + Project project = getProject(request); + + List localExpressions = toExpressionList(project.getMetadata().getPreferenceStore().get("scripting.expressions")); + localExpressions = localExpressions.size() > 20 ? localExpressions.subList(0, 20) : localExpressions; + + List globalExpressions = toExpressionList(ProjectManager.singleton.getPreferenceStore().get("scripting.expressions")); + + Set done = new HashSet(); + + response.setCharacterEncoding("UTF-8"); + response.setHeader("Content-Type", "application/json"); + + JSONWriter writer = new JSONWriter(response.getWriter()); + writer.object(); + writer.key("expressions"); + writer.array(); + for (String s : localExpressions) { + writer.object(); + writer.key("code"); writer.value(s); + writer.key("global"); writer.value(false); + writer.endObject(); + done.add(s); + } + for (String s : globalExpressions) { + if (!done.contains(s)) { + writer.object(); + writer.key("code"); writer.value(s); + writer.key("global"); writer.value(true); + writer.endObject(); + } + } + writer.endArray(); + writer.endObject(); + } catch (Exception e) { + respondException(response, e); + } + } +} diff --git a/main/src/com/google/refine/commands/expr/GetExpressionLanguageInfoCommand.java b/main/src/com/google/refine/commands/expr/GetExpressionLanguageInfoCommand.java new file mode 100644 index 000000000..ba1a46c9e --- /dev/null +++ b/main/src/com/google/refine/commands/expr/GetExpressionLanguageInfoCommand.java @@ -0,0 +1,58 @@ +package com.google.refine.commands.expr; + +import java.io.IOException; +import java.util.Properties; +import java.util.Map.Entry; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.json.JSONWriter; + +import com.google.refine.commands.Command; +import com.google.refine.gel.Control; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.Function; + +public class GetExpressionLanguageInfoCommand extends Command { + + @Override + public void doGet(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + try { + response.setCharacterEncoding("UTF-8"); + response.setHeader("Content-Type", "application/json"); + + JSONWriter writer = new JSONWriter(response.getWriter()); + Properties options = new Properties(); + + writer.object(); + + writer.key("functions"); + writer.object(); + { + for (Entry entry : ControlFunctionRegistry.getFunctionMapping()) { + writer.key(entry.getKey()); + entry.getValue().write(writer, options); + } + } + writer.endObject(); + + writer.key("controls"); + writer.object(); + { + for (Entry entry : ControlFunctionRegistry.getControlMapping()) { + writer.key(entry.getKey()); + entry.getValue().write(writer, options); + } + } + writer.endObject(); + + writer.endObject(); + } catch (Exception e) { + respondException(response, e); + } + } +} diff --git a/main/src/com/google/refine/commands/expr/LogExpressionCommand.java b/main/src/com/google/refine/commands/expr/LogExpressionCommand.java new file mode 100644 index 000000000..52eff3997 --- /dev/null +++ b/main/src/com/google/refine/commands/expr/LogExpressionCommand.java @@ -0,0 +1,37 @@ +package com.google.refine.commands.expr; + +import java.io.IOException; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import com.google.refine.ProjectManager; +import com.google.refine.commands.Command; +import com.google.refine.model.Project; +import com.google.refine.preference.TopList; + +public class LogExpressionCommand extends Command { + + @Override + public void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + try { + Project project = getProject(request); + String expression = request.getParameter("expression"); + + ((TopList) project.getMetadata().getPreferenceStore().get("scripting.expressions")) + .add(expression); + + ((TopList) ProjectManager.singleton.getPreferenceStore().get("scripting.expressions")) + .add(expression); + + response.setCharacterEncoding("UTF-8"); + response.setHeader("Content-Type", "application/json"); + response.getWriter().write("{ \"code\" : \"ok\" }"); + } catch (Exception e) { + respondException(response, e); + } + } +} diff --git a/main/src/com/google/refine/commands/expr/PreviewExpressionCommand.java b/main/src/com/google/refine/commands/expr/PreviewExpressionCommand.java new file mode 100644 index 000000000..2add36437 --- /dev/null +++ b/main/src/com/google/refine/commands/expr/PreviewExpressionCommand.java @@ -0,0 +1,205 @@ +package com.google.refine.commands.expr; + +import java.io.IOException; +import java.io.Serializable; +import java.util.Calendar; +import java.util.Date; +import java.util.List; +import java.util.Properties; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.commands.Command; +import com.google.refine.expr.EvalError; +import com.google.refine.expr.Evaluable; +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.expr.HasFields; +import com.google.refine.expr.MetaParser; +import com.google.refine.expr.ParsingException; +import com.google.refine.expr.WrappedCell; +import com.google.refine.expr.WrappedRow; +import com.google.refine.model.Cell; +import com.google.refine.model.Project; +import com.google.refine.model.Row; +import com.google.refine.util.ParsingUtilities; + +public class PreviewExpressionCommand extends Command { + + @Override + public void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + try { + Project project = getProject(request); + + int cellIndex = Integer.parseInt(request.getParameter("cellIndex")); + String columnName = cellIndex < 0 ? "" : project.columnModel.getColumnByCellIndex(cellIndex).getName(); + + String expression = request.getParameter("expression"); + String rowIndicesString = request.getParameter("rowIndices"); + if (rowIndicesString == null) { + respond(response, "{ \"code\" : \"error\", \"message\" : \"No row indices specified\" }"); + return; + } + + boolean repeat = "true".equals(request.getParameter("repeat")); + int repeatCount = 10; + if (repeat) { + String repeatCountString = request.getParameter("repeatCount"); + try { + repeatCount = Math.max(Math.min(Integer.parseInt(repeatCountString), 10), 0); + } catch (Exception e) { + } + } + + response.setCharacterEncoding("UTF-8"); + response.setHeader("Content-Type", "application/json"); + + JSONArray rowIndices = ParsingUtilities.evaluateJsonStringToArray(rowIndicesString); + int length = rowIndices.length(); + + JSONWriter writer = new JSONWriter(response.getWriter()); + writer.object(); + + try { + Evaluable eval = MetaParser.parse(expression); + + writer.key("code"); writer.value("ok"); + writer.key("results"); writer.array(); + + Properties bindings = ExpressionUtils.createBindings(project); + for (int i = 0; i < length; i++) { + Object result = null; + + int rowIndex = rowIndices.getInt(i); + if (rowIndex >= 0 && rowIndex < project.rows.size()) { + Row row = project.rows.get(rowIndex); + Cell cell = row.getCell(cellIndex); + + try { + ExpressionUtils.bind(bindings, row, rowIndex, columnName, cell); + result = eval.evaluate(bindings); + + if (repeat) { + for (int r = 0; r < repeatCount && ExpressionUtils.isStorable(result); r++) { + Cell newCell = new Cell((Serializable) result, (cell != null) ? cell.recon : null); + ExpressionUtils.bind(bindings, row, rowIndex, columnName, newCell); + + Object newResult = eval.evaluate(bindings); + if (ExpressionUtils.isError(newResult)) { + break; + } else if (ExpressionUtils.sameValue(result, newResult)) { + break; + } else { + result = newResult; + } + } + } + } catch (Exception e) { + // ignore + } + } + + if (result == null) { + writer.value(null); + } else if (ExpressionUtils.isError(result)) { + writer.object(); + writer.key("message"); writer.value(((EvalError) result).message); + writer.endObject(); + } else { + StringBuffer sb = new StringBuffer(); + + writeValue(sb, result, false); + + writer.value(sb.toString()); + } + } + writer.endArray(); + } catch (ParsingException e) { + writer.key("code"); writer.value("error"); + writer.key("type"); writer.value("parser"); + writer.key("message"); writer.value(e.getMessage()); + } catch (Exception e) { + writer.key("code"); writer.value("error"); + writer.key("type"); writer.value("other"); + writer.key("message"); writer.value(e.getMessage()); + } + + writer.endObject(); + } catch (Exception e) { + respondException(response, e); + } + } + + static protected void writeValue(StringBuffer sb, Object v, boolean quote) throws JSONException { + if (ExpressionUtils.isError(v)) { + sb.append("[error: " + ((EvalError) v).message + "]"); + } else { + if (v == null) { + sb.append("null"); + } else { + if (v instanceof WrappedCell) { + sb.append("[object Cell]"); + } else if (v instanceof WrappedRow) { + sb.append("[object Row]"); + } else if (v instanceof JSONObject) { + sb.append(((JSONObject) v).toString()); + } else if (v instanceof JSONArray) { + sb.append(((JSONArray) v).toString()); + } else if (ExpressionUtils.isArray(v)) { + Object[] a = (Object[]) v; + sb.append("[ "); + for (int i = 0; i < a.length; i++) { + if (i > 0) { + sb.append(", "); + } + writeValue(sb, a[i], true); + } + sb.append(" ]"); + } else if (ExpressionUtils.isArrayOrList(v)) { + List list = ExpressionUtils.toObjectList(v); + sb.append("[ "); + for (int i = 0; i < list.size(); i++) { + if (i > 0) { + sb.append(", "); + } + writeValue(sb, list.get(i), true); + } + sb.append(" ]"); + } else if (v instanceof HasFields) { + sb.append("[object " + v.getClass().getSimpleName() + "]"); + } else if (v instanceof Calendar) { + Calendar c = (Calendar) v; + + sb.append("[date " + + ParsingUtilities.dateToString(c.getTime()) +"]"); + } else if (v instanceof Date) { + sb.append("[date " + + ParsingUtilities.dateToString((Date) v) +"]"); + } else if (v instanceof String) { + if (quote) { + sb.append(JSONObject.quote((String) v)); + } else { + sb.append((String) v); + } + } else if (v instanceof Double || v instanceof Float) { + Number n = (Number) v; + if (n.doubleValue() - n.longValue() == 0.0) { + sb.append(n.longValue()); + } else { + sb.append(n.doubleValue()); + } + } else { + sb.append(v.toString()); + } + } + } + } +} diff --git a/main/src/com/google/refine/commands/freebase/GuessTypesOfColumnCommand.java b/main/src/com/google/refine/commands/freebase/GuessTypesOfColumnCommand.java new file mode 100644 index 000000000..19731bf4a --- /dev/null +++ b/main/src/com/google/refine/commands/freebase/GuessTypesOfColumnCommand.java @@ -0,0 +1,238 @@ +package com.google.refine.commands.freebase; + +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.StringWriter; +import java.net.URL; +import java.net.URLConnection; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.commands.Command; +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.Row; +import com.google.refine.util.ParsingUtilities; + +public class GuessTypesOfColumnCommand extends Command { + + @Override + public void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + try { + Project project = getProject(request); + String columnName = request.getParameter("columnName"); + String serviceUrl = request.getParameter("service"); + + response.setCharacterEncoding("UTF-8"); + response.setHeader("Content-Type", "application/json"); + + JSONWriter writer = new JSONWriter(response.getWriter()); + writer.object(); + + Column column = project.columnModel.getColumnByName(columnName); + if (column == null) { + writer.key("code"); writer.value("error"); + writer.key("message"); writer.value("No such column"); + } else { + try { + writer.key("code"); writer.value("ok"); + writer.key("types"); writer.array(); + + List typeGroups = guessTypes(project, column, serviceUrl); + for (TypeGroup tg : typeGroups) { + writer.object(); + writer.key("id"); writer.value(tg.id); + writer.key("name"); writer.value(tg.name); + writer.key("score"); writer.value(tg.score); + writer.key("count"); writer.value(tg.count); + writer.endObject(); + } + + writer.endArray(); + } catch (Exception e) { + writer.key("code"); writer.value("error"); + } + } + + writer.endObject(); + } catch (Exception e) { + respondException(response, e); + } + } + + final static int s_sampleSize = 10; + + /** + * Run relevance searches for the first n cells in the given column and + * count the types of the results. Return a sorted list of types, from most + * frequent to least. + * + * @param project + * @param column + * @return + */ + protected List guessTypes(Project project, Column column, String serviceUrl) { + Map map = new HashMap(); + + int cellIndex = column.getCellIndex(); + + List samples = new ArrayList(s_sampleSize); + Set sampleSet = new HashSet(); + + for (Row row : project.rows) { + Object value = row.getCellValue(cellIndex); + if (ExpressionUtils.isNonBlankData(value)) { + String s = value.toString().trim(); + if (!sampleSet.contains(s)) { + samples.add(s); + sampleSet.add(s); + if (samples.size() >= s_sampleSize) { + break; + } + } + } + } + + StringWriter stringWriter = new StringWriter(); + try { + JSONWriter jsonWriter = new JSONWriter(stringWriter); + jsonWriter.object(); + for (int i = 0; i < samples.size(); i++) { + jsonWriter.key("q" + i); + jsonWriter.object(); + + jsonWriter.key("query"); jsonWriter.value(samples.get(i)); + jsonWriter.key("limit"); jsonWriter.value(3); + + jsonWriter.endObject(); + } + jsonWriter.endObject(); + } catch (JSONException e) { + // ignore + } + + String queriesString = stringWriter.toString(); + try { + URL url = new URL(serviceUrl); + URLConnection connection = url.openConnection(); + { + connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded"); + connection.setConnectTimeout(30000); + connection.setDoOutput(true); + + DataOutputStream dos = new DataOutputStream(connection.getOutputStream()); + try { + String body = "queries=" + ParsingUtilities.encode(queriesString); + + dos.writeBytes(body); + } finally { + dos.flush(); + dos.close(); + } + + connection.connect(); + } + + InputStream is = connection.getInputStream(); + try { + String s = ParsingUtilities.inputStreamToString(is); + JSONObject o = ParsingUtilities.evaluateJsonStringToObject(s); + + for (int i = 0; i < samples.size(); i++) { + String key = "q" + i; + if (!o.has(key)) { + continue; + } + + JSONObject o2 = o.getJSONObject(key); + if (!(o2.has("result"))) { + continue; + } + + JSONArray results = o2.getJSONArray("result"); + int count = results.length(); + + for (int j = 0; j < count; j++) { + JSONObject result = results.getJSONObject(j); + double score = 1.0 / (1 + j); // score by each result's rank + + JSONArray types = result.getJSONArray("type"); + int typeCount = types.length(); + + for (int t = 0; t < typeCount; t++) { + Object type = types.get(t); + String typeID; + String typeName; + + if (type instanceof String) { + typeID = typeName = (String) type; + } else { + typeID = ((JSONObject) type).getString("id"); + typeName = ((JSONObject) type).getString("name"); + } + + double score2 = score * (typeCount - t) / (double) typeCount; + if (map.containsKey(typeID)) { + TypeGroup tg = map.get(typeID); + tg.score += score2; + tg.count++; + } else { + map.put(typeID, new TypeGroup(typeID, typeName, score2)); + } + } + } + } + } finally { + is.close(); + } + } catch (Exception e) { + logger.error("Failed to guess cell types for load\n" + queriesString, e); + } + + List types = new ArrayList(map.values()); + Collections.sort(types, new Comparator() { + public int compare(TypeGroup o1, TypeGroup o2) { + int c = Math.min(s_sampleSize, o2.count) - Math.min(s_sampleSize, o1.count); + if (c != 0) { + return c; + } + return (int) Math.signum(o2.score / o2.count - o1.score / o1.count); + } + }); + + return types; + } + + static protected class TypeGroup { + String id; + String name; + int count; + double score; + + TypeGroup(String id, String name, double score) { + this.id = id; + this.name = name; + this.score = score; + this.count = 1; + } + } +} diff --git a/main/src/com/google/refine/commands/freebase/ImportQADataCommand.java b/main/src/com/google/refine/commands/freebase/ImportQADataCommand.java new file mode 100644 index 000000000..90462d485 --- /dev/null +++ b/main/src/com/google/refine/commands/freebase/ImportQADataCommand.java @@ -0,0 +1,36 @@ +package com.google.refine.commands.freebase; + +import java.io.IOException; +import java.util.Properties; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import com.google.refine.ProjectManager; +import com.google.refine.commands.Command; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; +import com.google.refine.operations.recon.ImportQADataOperation; +import com.google.refine.process.Process; + +public class ImportQADataCommand extends Command { + @Override + public void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + ProjectManager.singleton.setBusy(true); + try { + Project project = getProject(request); + + AbstractOperation op = new ImportQADataOperation(); + Process process = op.createProcess(project, new Properties()); + + performProcessAndRespond(request, response, project, process); + } catch (Exception e) { + respondException(response, e); + } finally { + ProjectManager.singleton.setBusy(false); + } + } +} diff --git a/main/src/com/google/refine/commands/freebase/MQLReadCommand.java b/main/src/com/google/refine/commands/freebase/MQLReadCommand.java new file mode 100644 index 000000000..29fcd78aa --- /dev/null +++ b/main/src/com/google/refine/commands/freebase/MQLReadCommand.java @@ -0,0 +1,31 @@ +package com.google.refine.commands.freebase; + +import java.io.IOException; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import com.google.refine.commands.Command; +import com.google.refine.oauth.OAuthUtilities; +import com.google.refine.oauth.Provider; +import com.google.refine.util.FreebaseUtils; + +public class MQLReadCommand extends Command { + + @Override + public void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + try { + Provider provider = OAuthUtilities.getProvider(request); + response.setCharacterEncoding("UTF-8"); + response.setHeader("Content-Type", "application/json"); + String query = request.getParameter("query"); + String result = FreebaseUtils.mqlread(provider,query); + response.getWriter().write(result); + } catch (Exception e) { + respondException(response, e); + } + } +} diff --git a/main/src/com/google/refine/commands/freebase/MQLWriteCommand.java b/main/src/com/google/refine/commands/freebase/MQLWriteCommand.java new file mode 100644 index 000000000..76a5c1143 --- /dev/null +++ b/main/src/com/google/refine/commands/freebase/MQLWriteCommand.java @@ -0,0 +1,40 @@ +package com.google.refine.commands.freebase; + +import java.io.IOException; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import com.google.refine.commands.Command; +import com.google.refine.oauth.Credentials; +import com.google.refine.oauth.OAuthUtilities; +import com.google.refine.oauth.Provider; +import com.google.refine.util.FreebaseUtils; + +public class MQLWriteCommand extends Command { + + @Override + public void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + try { + Provider provider = OAuthUtilities.getProvider(request); + + Credentials access_credentials = Credentials.getCredentials(request, provider, Credentials.Type.ACCESS); + + response.setCharacterEncoding("UTF-8"); + response.setHeader("Content-Type", "application/json"); + + if (access_credentials != null) { + String query = request.getParameter("query"); + String result = FreebaseUtils.mqlwrite(access_credentials, provider, query); + response.getWriter().write(result); + } else { + respond(response, "401 Unauthorized", "You don't have the right credentials"); + } + } catch (Exception e) { + respondException(response, e); + } + } +} diff --git a/main/src/com/google/refine/commands/freebase/PreviewProtographCommand.java b/main/src/com/google/refine/commands/freebase/PreviewProtographCommand.java new file mode 100644 index 000000000..b5824dc22 --- /dev/null +++ b/main/src/com/google/refine/commands/freebase/PreviewProtographCommand.java @@ -0,0 +1,71 @@ +package com.google.refine.commands.freebase; + +import java.io.IOException; +import java.io.StringWriter; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.json.JSONObject; + +import com.google.refine.browsing.Engine; +import com.google.refine.browsing.FilteredRows; +import com.google.refine.commands.Command; +import com.google.refine.model.Project; +import com.google.refine.protograph.Protograph; +import com.google.refine.protograph.transpose.MqlwriteLikeTransposedNodeFactory; +import com.google.refine.protograph.transpose.Transposer; +import com.google.refine.protograph.transpose.TripleLoaderTransposedNodeFactory; +import com.google.refine.util.ParsingUtilities; + +public class PreviewProtographCommand extends Command { + @Override + public void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + try { + Project project = getProject(request); + Engine engine = getEngine(request, project); + FilteredRows filteredRows = engine.getAllFilteredRows(); + + response.setCharacterEncoding("UTF-8"); + response.setHeader("Content-Type", "application/json"); + + String jsonString = request.getParameter("protograph"); + JSONObject json = ParsingUtilities.evaluateJsonStringToObject(jsonString); + Protograph protograph = Protograph.reconstruct(json); + + StringBuffer sb = new StringBuffer(2048); + sb.append("{ "); + + { + StringWriter stringWriter = new StringWriter(); + TripleLoaderTransposedNodeFactory nodeFactory = new TripleLoaderTransposedNodeFactory(project, stringWriter); + + Transposer.transpose(project, filteredRows, protograph, protograph.getRootNode(0), nodeFactory); + nodeFactory.flush(); + + sb.append("\"tripleloader\" : "); + sb.append(JSONObject.quote(stringWriter.toString())); + } + + { + StringWriter stringWriter = new StringWriter(); + MqlwriteLikeTransposedNodeFactory nodeFactory = new MqlwriteLikeTransposedNodeFactory(stringWriter); + + Transposer.transpose(project, filteredRows, protograph, protograph.getRootNode(0), nodeFactory); + nodeFactory.flush(); + + sb.append(", \"mqllike\" : "); + sb.append(stringWriter.toString()); + } + + sb.append(" }"); + + respond(response, sb.toString()); + } catch (Exception e) { + respondException(response, e); + } + } +} diff --git a/main/src/com/google/refine/commands/freebase/SaveProtographCommand.java b/main/src/com/google/refine/commands/freebase/SaveProtographCommand.java new file mode 100644 index 000000000..f7d04f6ff --- /dev/null +++ b/main/src/com/google/refine/commands/freebase/SaveProtographCommand.java @@ -0,0 +1,40 @@ +package com.google.refine.commands.freebase; + +import java.io.IOException; +import java.util.Properties; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.json.JSONObject; + +import com.google.refine.commands.Command; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; +import com.google.refine.operations.SaveProtographOperation; +import com.google.refine.process.Process; +import com.google.refine.protograph.Protograph; +import com.google.refine.util.ParsingUtilities; + +public class SaveProtographCommand extends Command { + @Override + public void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + try { + Project project = getProject(request); + + String jsonString = request.getParameter("protograph"); + JSONObject json = ParsingUtilities.evaluateJsonStringToObject(jsonString); + Protograph protograph = Protograph.reconstruct(json); + + AbstractOperation op = new SaveProtographOperation(protograph); + Process process = op.createProcess(project, new Properties()); + + performProcessAndRespond(request, response, project, process); + } catch (Exception e) { + respondException(response, e); + } + } +} diff --git a/main/src/com/google/refine/commands/freebase/UploadDataCommand.java b/main/src/com/google/refine/commands/freebase/UploadDataCommand.java new file mode 100644 index 000000000..b4bef15e6 --- /dev/null +++ b/main/src/com/google/refine/commands/freebase/UploadDataCommand.java @@ -0,0 +1,91 @@ +package com.google.refine.commands.freebase; + +import java.io.IOException; +import java.io.StringWriter; +import java.net.URL; +import java.util.Properties; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.json.JSONException; +import org.json.JSONObject; + +import com.google.refine.ProjectManager; +import com.google.refine.browsing.Engine; +import com.google.refine.commands.Command; +import com.google.refine.exporters.ProtographTransposeExporter.TripleLoaderExporter; +import com.google.refine.model.Project; +import com.google.refine.preference.PreferenceStore; +import com.google.refine.util.FreebaseUtils; +import com.google.refine.util.ParsingUtilities; + +public class UploadDataCommand extends Command { + final static public String s_dataLoadJobIDPref = "freebase.load.jobID"; + final static public String s_dataLoadJobNamePref = "freebase.load.jobName"; + + @Override + public void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + ProjectManager.singleton.setBusy(true); + try { + Project project = getProject(request); + Engine engine = getEngine(request, project); + PreferenceStore preferenceStore = project.getMetadata().getPreferenceStore(); + + TripleLoaderExporter exporter = new TripleLoaderExporter(); + StringWriter triples = new StringWriter(10 * 1024 * 1024); + exporter.export(project, new Properties(), engine, triples); + + String source_name = request.getParameter("source_name"); + String source_id = request.getParameter("source_id"); + String graph = request.getParameter("graph"); + String mdo_id = null; + + preferenceStore.put(s_dataLoadJobNamePref, source_name); + + try { + Integer jobID = (Integer) preferenceStore.get(s_dataLoadJobIDPref); + if (jobID != null) { + URL url = new URL("http://gridworks-loads.freebaseapps.com/job_id_to_mdo?job=" + jobID); + String s = ParsingUtilities.inputStreamToString(url.openConnection().getInputStream()); + + if (!s.equals("null")) { + mdo_id = s; + } + } + } catch (Exception e) { + // ignore + } + + String uploadResponse = FreebaseUtils.uploadTriples( + request, graph, source_name, source_id, mdo_id, triples.toString()); + + response.setCharacterEncoding("UTF-8"); + response.setHeader("Content-Type", "application/json"); + + try { + JSONObject obj = new JSONObject(uploadResponse); + if (obj.has("result") && !obj.isNull("result")) { + JSONObject result = obj.getJSONObject("result"); + if (result.has("job_id") && !result.isNull("job_id")) { + Integer jobID = result.getInt("job_id"); + + project.getMetadata().getPreferenceStore().put( + s_dataLoadJobIDPref, jobID); + } + } + response.getWriter().write(uploadResponse); + } catch (JSONException e) { + respond(response,"500 Error", uploadResponse); + } + + } catch (Exception e) { + respondException(response, e); + } finally { + ProjectManager.singleton.setBusy(false); + } + } +} diff --git a/main/src/com/google/refine/commands/history/ApplyOperationsCommand.java b/main/src/com/google/refine/commands/history/ApplyOperationsCommand.java new file mode 100644 index 000000000..6e0069fdc --- /dev/null +++ b/main/src/com/google/refine/commands/history/ApplyOperationsCommand.java @@ -0,0 +1,60 @@ +package com.google.refine.commands.history; + +import java.io.IOException; +import java.util.Properties; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; + +import com.google.refine.commands.Command; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; +import com.google.refine.operations.OperationRegistry; +import com.google.refine.process.Process; +import com.google.refine.util.ParsingUtilities; + +public class ApplyOperationsCommand extends Command { + + @Override + public void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + Project project = getProject(request); + String jsonString = request.getParameter("operations"); + try { + JSONArray a = ParsingUtilities.evaluateJsonStringToArray(jsonString); + int count = a.length(); + for (int i = 0; i < count; i++) { + JSONObject obj = a.getJSONObject(i); + + reconstructOperation(project, obj); + } + + if (project.processManager.hasPending()) { + respond(response, "{ \"code\" : \"pending\" }"); + } else { + respond(response, "{ \"code\" : \"ok\" }"); + } + } catch (JSONException e) { + respondException(response, e); + } + } + + protected void reconstructOperation(Project project, JSONObject obj) { + AbstractOperation operation = OperationRegistry.reconstruct(project, obj); + if (operation != null) { + try { + Process process = operation.createProcess(project, new Properties()); + + project.processManager.queueProcess(process); + } catch (Exception e) { + e.printStackTrace(); + } + } + } +} diff --git a/main/src/com/google/refine/commands/history/CancelProcessesCommand.java b/main/src/com/google/refine/commands/history/CancelProcessesCommand.java new file mode 100644 index 000000000..6acd7d522 --- /dev/null +++ b/main/src/com/google/refine/commands/history/CancelProcessesCommand.java @@ -0,0 +1,31 @@ +package com.google.refine.commands.history; + +import java.io.IOException; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import com.google.refine.commands.Command; +import com.google.refine.model.Project; + +public class CancelProcessesCommand extends Command { + + @Override + public void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + if( request == null ) throw new IllegalArgumentException("parameter 'request' should not be null"); + if( response == null ) throw new IllegalArgumentException("parameter 'request' should not be null"); + + try { + Project project = getProject(request); + project.getProcessManager().cancelAll(); + + response.setCharacterEncoding("UTF-8"); + response.setHeader("Content-Type", "application/json"); + response.getWriter().write("{ \"code\" : \"ok\" }"); + } catch (Exception e) { + respondException(response, e); + } + } +} diff --git a/main/src/com/google/refine/commands/history/GetHistoryCommand.java b/main/src/com/google/refine/commands/history/GetHistoryCommand.java new file mode 100644 index 000000000..3ce197f54 --- /dev/null +++ b/main/src/com/google/refine/commands/history/GetHistoryCommand.java @@ -0,0 +1,27 @@ +package com.google.refine.commands.history; + +import java.io.IOException; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.json.JSONException; + +import com.google.refine.commands.Command; +import com.google.refine.model.Project; + +public class GetHistoryCommand extends Command { + @Override + public void doGet(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + Project project = getProject(request); + try { + respondJSON(response, project.history); + } catch (JSONException e) { + respondException(response, e); + } + } + +} diff --git a/main/src/com/google/refine/commands/history/GetOperationsCommand.java b/main/src/com/google/refine/commands/history/GetOperationsCommand.java new file mode 100644 index 000000000..bd288770b --- /dev/null +++ b/main/src/com/google/refine/commands/history/GetOperationsCommand.java @@ -0,0 +1,50 @@ +package com.google.refine.commands.history; + +import java.io.IOException; +import java.util.Properties; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.commands.Command; +import com.google.refine.history.HistoryEntry; +import com.google.refine.model.Project; + +public class GetOperationsCommand extends Command { + @Override + public void doGet(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + Project project = getProject(request); + + try { + response.setCharacterEncoding("UTF-8"); + response.setHeader("Content-Type", "application/json"); + + Properties options = new Properties(); + JSONWriter writer = new JSONWriter(response.getWriter()); + + writer.object(); + writer.key("entries"); writer.array(); + + for (HistoryEntry entry : project.history.getLastPastEntries(-1)) { + writer.object(); + writer.key("description"); writer.value(entry.description); + if (entry.operation != null) { + writer.key("operation"); + entry.operation.write(writer, options); + } + writer.endObject(); + } + writer.endArray(); + writer.endObject(); + } catch (JSONException e) { + respondException(response, e); + } + } + +} diff --git a/main/src/com/google/refine/commands/history/GetProcessesCommand.java b/main/src/com/google/refine/commands/history/GetProcessesCommand.java new file mode 100644 index 000000000..12e5ebb62 --- /dev/null +++ b/main/src/com/google/refine/commands/history/GetProcessesCommand.java @@ -0,0 +1,27 @@ +package com.google.refine.commands.history; + +import java.io.IOException; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.json.JSONException; + +import com.google.refine.commands.Command; +import com.google.refine.model.Project; + +public class GetProcessesCommand extends Command { + @Override + public void doGet(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + Project project = getProject(request); + + try { + respondJSON(response, project.processManager); + } catch (JSONException e) { + respondException(response, e); + } + } +} diff --git a/main/src/com/google/refine/commands/history/UndoRedoCommand.java b/main/src/com/google/refine/commands/history/UndoRedoCommand.java new file mode 100644 index 000000000..2c36f17f5 --- /dev/null +++ b/main/src/com/google/refine/commands/history/UndoRedoCommand.java @@ -0,0 +1,44 @@ +package com.google.refine.commands.history; + +import java.io.IOException; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import com.google.refine.commands.Command; +import com.google.refine.history.HistoryProcess; +import com.google.refine.model.Project; + +public class UndoRedoCommand extends Command { + + @Override + public void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + Project project = getProject(request); + + long lastDoneID = -1; + String lastDoneIDString = request.getParameter("lastDoneID"); + if (lastDoneIDString != null) { + lastDoneID = Long.parseLong(lastDoneIDString); + } else { + String undoIDString = request.getParameter("undoID"); + if (undoIDString != null) { + long undoID = Long.parseLong(undoIDString); + + lastDoneID = project.history.getPrecedingEntryID(undoID); + } + } + + try { + boolean done = lastDoneID == -1 || + project.processManager.queueProcess( + new HistoryProcess(project, lastDoneID)); + + respond(response, "{ \"code\" : " + (done ? "\"ok\"" : "\"pending\"") + " }"); + } catch (Exception e) { + respondException(response, e); + } + } +} diff --git a/main/src/com/google/refine/commands/project/CreateProjectCommand.java b/main/src/com/google/refine/commands/project/CreateProjectCommand.java new file mode 100644 index 000000000..8874e169d --- /dev/null +++ b/main/src/com/google/refine/commands/project/CreateProjectCommand.java @@ -0,0 +1,459 @@ +package com.google.refine.commands.project; + +import java.io.BufferedInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FilterInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; +import java.io.Serializable; +import java.io.UnsupportedEncodingException; +import java.net.URL; +import java.net.URLConnection; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.Map.Entry; +import java.util.zip.GZIPInputStream; +import java.util.zip.ZipEntry; +import java.util.zip.ZipInputStream; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.apache.commons.fileupload.FileItemIterator; +import org.apache.commons.fileupload.FileItemStream; +import org.apache.commons.fileupload.servlet.ServletFileUpload; +import org.apache.commons.fileupload.util.Streams; +import org.apache.tools.bzip2.CBZip2InputStream; +import org.apache.tools.tar.TarEntry; +import org.apache.tools.tar.TarInputStream; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.refine.ProjectManager; +import com.google.refine.ProjectMetadata; +import com.google.refine.commands.Command; +import com.google.refine.importers.Importer; +import com.google.refine.importers.ImporterRegistry; +import com.google.refine.importers.ReaderImporter; +import com.google.refine.importers.StreamImporter; +import com.google.refine.importers.TsvCsvImporter; +import com.google.refine.importers.UrlImporter; +import com.google.refine.model.Project; +import com.google.refine.util.IOUtils; +import com.google.refine.util.ParsingUtilities; +import com.ibm.icu.text.CharsetDetector; +import com.ibm.icu.text.CharsetMatch; + +public class CreateProjectCommand extends Command { + + final static Logger logger = LoggerFactory.getLogger("create-project_command"); + + @Override + public void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + ProjectManager.singleton.setBusy(true); + try { + /* + * The uploaded file is in the POST body as a "file part". If + * we call request.getParameter() then the POST body will get + * read and we won't have a chance to parse the body ourselves. + * This is why we have to parse the URL for parameters ourselves. + * Don't call request.getParameter() before calling internalImport(). + */ + Properties options = ParsingUtilities.parseUrlParameters(request); + + Project project = new Project(); + ProjectMetadata pm = new ProjectMetadata(); + + internalImport(request, project, pm, options); + + /* + * The import process above populates options with parameters + * in the POST body. That's why we're constructing the project + * metadata object after calling internalImport(). + */ + pm.setName(options.getProperty("project-name")); + pm.setPassword(options.getProperty("project-password")); + pm.setEncoding(options.getProperty("encoding")); + pm.setEncodingConfidence(options.getProperty("encoding_confidence")); + ProjectManager.singleton.registerProject(project, pm); + + project.update(); + + redirect(response, "/project?project=" + project.id); + } catch (Exception e) { + redirect(response, "/error.html?redirect=index.html&msg=" + + ParsingUtilities.encode("Failed to import file: " + e.getLocalizedMessage()) + ); + e.printStackTrace(); + } finally { + ProjectManager.singleton.setBusy(false); + } + } + + protected void internalImport( + HttpServletRequest request, + Project project, + ProjectMetadata metadata, + Properties options + ) throws Exception { + + ServletFileUpload upload = new ServletFileUpload(); + String url = options.getProperty("url"); + boolean imported = false; + + FileItemIterator iter = upload.getItemIterator(request); + while (iter.hasNext()) { + FileItemStream item = iter.next(); + String name = item.getFieldName().toLowerCase(); + InputStream stream = item.openStream(); + if (item.isFormField()) { + if (name.equals("raw-text")) { + Reader reader = new InputStreamReader(stream,"UTF-8"); + try { + internalInvokeImporter(project, new TsvCsvImporter(), metadata, options, reader); + imported = true; + } finally { + reader.close(); + } + } else if (name.equals("project-url")) { + url = Streams.asString(stream); + } else { + options.put(name, Streams.asString(stream)); + } + } else { + String fileName = item.getName().toLowerCase(); + if (fileName.length() > 0) { + try { + internalImportFile(project, metadata, options, fileName, stream); + imported = true; + } finally { + stream.close(); + } + } + } + } + + if (!imported && url != null && url.length() > 0) { + internalImportURL(request, project, metadata, options, url); + } + } + + static class SafeInputStream extends FilterInputStream { + public SafeInputStream(InputStream stream) { + super(stream); + } + + @Override + public void close() { + // some libraries attempt to close the input stream while they can't + // read anymore from it... unfortunately this behavior prevents + // the zip input stream from functioning correctly so we just have + // to ignore those close() calls and just close it ourselves + // forcefully later + } + + public void reallyClose() throws IOException { + super.close(); + } + } + + protected void internalImportFile( + Project project, + ProjectMetadata metadata, + Properties options, + String fileName, + InputStream inputStream + ) throws Exception { + + logger.info("Importing '{}'", fileName); + + if (fileName.endsWith(".zip") || fileName.endsWith(".tar.gz") || fileName.endsWith(".tgz") || fileName.endsWith(".tar.bz2")) { + + // first, save the file on disk, since we need two passes and we might + // not have enough memory to keep it all in there + File file = save(inputStream); + + // in the first pass, gather statistics about what files are in there + // unfortunately, we have to rely on files extensions, which is horrible but + // better than nothing + HashMap ext_map = new HashMap(); + + FileInputStream fis = new FileInputStream(file); + InputStream is = getStream(fileName, fis); + + // NOTE(SM): unfortunately, java.io does not provide any generalized class for + // archive-like input streams so while both TarInputStream and ZipInputStream + // behave precisely the same, there is no polymorphic behavior so we have + // to treat each instance explicitly... one of those times you wish you had + // closures + try { + if (is instanceof TarInputStream) { + TarInputStream tis = (TarInputStream) is; + TarEntry te; + while ((te = tis.getNextEntry()) != null) { + if (!te.isDirectory()) { + mapExtension(te.getName(),ext_map); + } + } + } else if (is instanceof ZipInputStream) { + ZipInputStream zis = (ZipInputStream) is; + ZipEntry ze; + while ((ze = zis.getNextEntry()) != null) { + if (!ze.isDirectory()) { + mapExtension(ze.getName(),ext_map); + } + } + } + } finally { + try { + is.close(); + fis.close(); + } catch (IOException e) {} + } + + // sort extensions by how often they appear + List> values = new ArrayList>(ext_map.entrySet()); + Collections.sort(values, new ValuesComparator()); + + if (values.size() == 0) { + throw new RuntimeException("The archive contains no files."); + } + + // this will contain the set of extensions we'll load from the archive + HashSet exts = new HashSet(); + + // find the extension that is most frequent or those who share the highest frequency value + if (values.size() == 1) { + exts.add(values.get(0).getKey()); + } else { + Entry most_frequent = values.get(0); + Entry second_most_frequent = values.get(1); + if (most_frequent.getValue() > second_most_frequent.getValue()) { // we have a winner + exts.add(most_frequent.getKey()); + } else { // multiple extensions have the same frequency + int winning_frequency = most_frequent.getValue(); + for (Entry e : values) { + if (e.getValue() == winning_frequency) { + exts.add(e.getKey()); + } + } + } + } + + logger.info("Most frequent extensions: {}", exts.toString()); + + // second pass, load the data for real + is = getStream(fileName, new FileInputStream(file)); + SafeInputStream sis = new SafeInputStream(is); + try { + if (is instanceof TarInputStream) { + TarInputStream tis = (TarInputStream) is; + TarEntry te; + while ((te = tis.getNextEntry()) != null) { + if (!te.isDirectory()) { + String name = te.getName(); + String ext = getExtension(name)[1]; + if (exts.contains(ext)) { + internalImportFile(project, metadata, options, name, sis); + } + } + } + } else if (is instanceof ZipInputStream) { + ZipInputStream zis = (ZipInputStream) is; + ZipEntry ze; + while ((ze = zis.getNextEntry()) != null) { + if (!ze.isDirectory()) { + String name = ze.getName(); + String ext = getExtension(name)[1]; + if (exts.contains(ext)) { + internalImportFile(project, metadata, options, name, sis); + } + } + } + } + } finally { + try { + sis.reallyClose(); + } catch (IOException e) {} + } + + } else if (fileName.endsWith(".gz")) { + internalImportFile(project, metadata, options, getExtension(fileName)[0], new GZIPInputStream(inputStream)); + } else if (fileName.endsWith(".bz2")) { + internalImportFile(project, metadata, options, getExtension(fileName)[0], new CBZip2InputStream(inputStream)); + } else { + load(project, metadata, options, fileName, inputStream); + } + } + + public static class ValuesComparator implements Comparator>, Serializable { + private static final long serialVersionUID = 8845863616149837657L; + + public int compare(Entry o1, Entry o2) { + return o2.getValue() - o1.getValue(); + } + } + + private void load(Project project, ProjectMetadata metadata, Properties options, String fileName, InputStream inputStream) throws Exception { + Importer importer = ImporterRegistry.guessImporter(null, fileName); + internalInvokeImporter(project, importer, metadata, options, inputStream, null); + } + + private File save(InputStream is) throws IOException { + File temp = this.servlet.getTempFile(Long.toString(System.currentTimeMillis())); + temp.deleteOnExit(); + IOUtils.copy(is,temp); + is.close(); + return temp; + } + + private void mapExtension(String name, Map ext_map) { + String ext = getExtension(name)[1]; + if (ext_map.containsKey(ext)) { + ext_map.put(ext, ext_map.get(ext) + 1); + } else { + ext_map.put(ext, 1); + } + } + + private InputStream getStream(String fileName, InputStream is) throws IOException { + if (fileName.endsWith(".tar.gz") || fileName.endsWith(".tgz")) { + return new TarInputStream(new GZIPInputStream(is)); + } else if (fileName.endsWith(".tar.bz2")) { + return new TarInputStream(new CBZip2InputStream(is)); + } else { + return new ZipInputStream(is); + } + } + + private String[] getExtension(String filename) { + String[] result = new String[2]; + int ext_index = filename.lastIndexOf('.'); + result[0] = (ext_index == -1) ? filename : filename.substring(0,ext_index); + result[1] = (ext_index == -1) ? "" : filename.substring(ext_index + 1); + return result; + } + + protected void internalImportURL( + HttpServletRequest request, + Project project, + ProjectMetadata metadata, + Properties options, + String urlString) throws Exception { + + URL url = new URL(urlString); + URLConnection connection = null; + + // Try for a URL importer first + Importer importer = ImporterRegistry.guessUrlImporter(url); + if (importer instanceof UrlImporter) { + ((UrlImporter) importer).read(url, project, metadata, options); + } else { + // If we couldn't find one, try opening URL and treating as a stream + try { + connection = url.openConnection(); + connection.setConnectTimeout(5000); + connection.connect(); + } catch (Exception e) { + throw new Exception("Cannot connect to " + urlString, e); + } + + InputStream inputStream = null; + try { + inputStream = connection.getInputStream(); + } catch (Exception e) { + throw new Exception("Cannot retrieve content from " + url, e); + } + + try { + String contentType = connection.getContentType(); + int semicolon = contentType.indexOf(';'); + if (semicolon >= 0) { + contentType = contentType.substring(0, semicolon); + } + + importer = ImporterRegistry.guessImporter(contentType, url.getPath()); + + internalInvokeImporter(project, importer, metadata, options, inputStream, connection.getContentEncoding()); + } finally { + inputStream.close(); + } + } + } + + protected void internalInvokeImporter( + Project project, + Importer importer, + ProjectMetadata metadata, + Properties options, + InputStream rawInputStream, + String encoding + ) throws Exception { + if (importer instanceof ReaderImporter) { + + BufferedInputStream inputStream = new BufferedInputStream(rawInputStream); + + // NOTE(SM): The ICU4J char detection code requires the input stream to support mark/reset. + // Unfortunately, not all ServletInputStream implementations are marking, so we need do + // this memory-expensive wrapping to make it work. It's far from ideal but I don't have + // a more efficient solution. + byte[] bytes = new byte[1024 * 4]; + inputStream.mark(bytes.length); + inputStream.read(bytes); + inputStream.reset(); + + CharsetDetector detector = new CharsetDetector(); + detector.setDeclaredEncoding("utf8"); // most of the content on the web is encoded in UTF-8 so start with that + + Reader reader = null; + CharsetMatch[] charsetMatches = detector.setText(bytes).detectAll(); + for (CharsetMatch charsetMatch : charsetMatches) { + try { + reader = new InputStreamReader(inputStream, charsetMatch.getName()); + + options.setProperty("encoding", charsetMatch.getName()); + options.setProperty("encoding_confidence", Integer.toString(charsetMatch.getConfidence())); + + logger.info("Best encoding guess: {} [confidence: {}]", charsetMatch.getName(), charsetMatch.getConfidence()); + + break; + } catch (UnsupportedEncodingException e) { + // silent + } + } + + if (reader == null) { // when all else fails + reader = encoding != null ? + new InputStreamReader(inputStream, encoding) : + new InputStreamReader(inputStream); + } + + ((ReaderImporter) importer).read(reader, project, metadata, options); + } else { + ((StreamImporter) importer).read(rawInputStream, project, metadata, options); + } + } + + protected void internalInvokeImporter( + Project project, + ReaderImporter importer, + ProjectMetadata metadata, + Properties options, + Reader reader + ) throws Exception { + importer.read(reader, project, metadata, options); + } + +} diff --git a/main/src/com/google/refine/commands/project/DeleteProjectCommand.java b/main/src/com/google/refine/commands/project/DeleteProjectCommand.java new file mode 100644 index 000000000..b3d4a3ae8 --- /dev/null +++ b/main/src/com/google/refine/commands/project/DeleteProjectCommand.java @@ -0,0 +1,28 @@ +package com.google.refine.commands.project; + +import java.io.IOException; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import com.google.refine.ProjectManager; +import com.google.refine.commands.Command; + +public class DeleteProjectCommand extends Command { + @Override + public void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + try { + long projectID = Long.parseLong(request.getParameter("project")); + + ProjectManager.singleton.deleteProject(projectID); + + respond(response, "{ \"code\" : \"ok\" }"); + + } catch (Exception e) { + respondException(response, e); + } + } +} diff --git a/main/src/com/google/refine/commands/project/ExportProjectCommand.java b/main/src/com/google/refine/commands/project/ExportProjectCommand.java new file mode 100644 index 000000000..80117e5b4 --- /dev/null +++ b/main/src/com/google/refine/commands/project/ExportProjectCommand.java @@ -0,0 +1,59 @@ +package com.google.refine.commands.project; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.zip.GZIPOutputStream; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.apache.tools.tar.TarOutputStream; + +import com.google.refine.ProjectManager; +import com.google.refine.commands.Command; +import com.google.refine.model.Project; + +public class ExportProjectCommand extends Command { + + @Override + public void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + try { + Project project = getProject(request); + ProjectManager.singleton.ensureProjectSaved(project.id); + + response.setHeader("Content-Type", "application/x-gzip"); + + OutputStream os = response.getOutputStream(); + try { + gzipTarToOutputStream(project, os); + } finally { + os.close(); + } + } catch (Exception e) { + respondException(response, e); + } + } + + protected void gzipTarToOutputStream(Project project, OutputStream os) throws IOException { + GZIPOutputStream gos = new GZIPOutputStream(os); + try { + tarToOutputStream(project, gos); + } finally { + gos.close(); + } + } + + protected void tarToOutputStream(Project project, OutputStream os) throws IOException { + TarOutputStream tos = new TarOutputStream(os); + try { + ProjectManager.singleton.exportProject(project.id, tos); + } finally { + tos.close(); + } + } + + +} diff --git a/main/src/com/google/refine/commands/project/ExportRowsCommand.java b/main/src/com/google/refine/commands/project/ExportRowsCommand.java new file mode 100644 index 000000000..0ede35eae --- /dev/null +++ b/main/src/com/google/refine/commands/project/ExportRowsCommand.java @@ -0,0 +1,64 @@ +package com.google.refine.commands.project; + +import java.io.IOException; +import java.io.PrintWriter; +import java.util.Enumeration; +import java.util.Properties; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import com.google.refine.ProjectManager; +import com.google.refine.browsing.Engine; +import com.google.refine.commands.Command; +import com.google.refine.exporters.CsvExporter; +import com.google.refine.exporters.Exporter; +import com.google.refine.exporters.ExporterRegistry; +import com.google.refine.model.Project; + +public class ExportRowsCommand extends Command { + + @SuppressWarnings("unchecked") + static public Properties getRequestParameters(HttpServletRequest request) { + Properties options = new Properties(); + + Enumeration en = request.getParameterNames(); + while (en.hasMoreElements()) { + String name = en.nextElement(); + options.put(name, request.getParameter(name)); + } + return options; + } + + public void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + ProjectManager.singleton.setBusy(true); + try { + Project project = getProject(request); + Engine engine = getEngine(request, project); + String format = request.getParameter("format"); + Properties options = getRequestParameters(request); + + Exporter exporter = ExporterRegistry.getExporter(format); + if (exporter == null) { + exporter = new CsvExporter('\t'); + } + + response.setCharacterEncoding("UTF-8"); + response.setHeader("Content-Type", exporter.getContentType()); + + if (exporter.takeWriter()) { + PrintWriter writer = response.getWriter(); + exporter.export(project, options, engine, writer); + } else { + exporter.export(project, options, engine, response.getOutputStream()); + } + } catch (Exception e) { + respondException(response, e); + } finally { + ProjectManager.singleton.setBusy(false); + } + } +} diff --git a/main/src/com/google/refine/commands/project/GetModelsCommand.java b/main/src/com/google/refine/commands/project/GetModelsCommand.java new file mode 100644 index 000000000..0bb993ff6 --- /dev/null +++ b/main/src/com/google/refine/commands/project/GetModelsCommand.java @@ -0,0 +1,66 @@ +package com.google.refine.commands.project; + +import java.io.IOException; +import java.util.Properties; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.commands.Command; +import com.google.refine.expr.MetaParser; +import com.google.refine.expr.MetaParser.LanguageInfo; +import com.google.refine.model.OverlayModel; +import com.google.refine.model.Project; + +public class GetModelsCommand extends Command { + @Override + public void doGet(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + Project project = getProject(request); + + try { + response.setCharacterEncoding("UTF-8"); + response.setHeader("Content-Type", "application/json"); + + Properties options = new Properties(); + JSONWriter writer = new JSONWriter(response.getWriter()); + + writer.object(); + writer.key("columnModel"); project.columnModel.write(writer, options); + writer.key("recordModel"); project.recordModel.write(writer, options); + + writer.key("overlayModels"); writer.object(); + for (String modelName : project.overlayModels.keySet()) { + OverlayModel overlayModel = project.overlayModels.get(modelName); + if (overlayModel != null) { + writer.key(modelName); + + project.overlayModels.get(modelName).write(writer, options); + } + } + writer.endObject(); + + writer.key("scripting"); writer.object(); + for (String languagePrefix : MetaParser.getLanguagePrefixes()) { + LanguageInfo info = MetaParser.getLanguageInfo(languagePrefix); + + writer.key(languagePrefix); + writer.object(); + writer.key("name"); writer.value(info.name); + writer.key("defaultExpression"); writer.value(info.defaultExpression); + writer.endObject(); + } + writer.endObject(); + + writer.endObject(); + } catch (JSONException e) { + respondException(response, e); + } + } + +} diff --git a/main/src/com/google/refine/commands/project/GetProjectMetadataCommand.java b/main/src/com/google/refine/commands/project/GetProjectMetadataCommand.java new file mode 100644 index 000000000..153d57f63 --- /dev/null +++ b/main/src/com/google/refine/commands/project/GetProjectMetadataCommand.java @@ -0,0 +1,28 @@ +package com.google.refine.commands.project; + +import java.io.IOException; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.json.JSONException; + +import com.google.refine.ProjectManager; +import com.google.refine.commands.Command; +import com.google.refine.model.Project; + +public class GetProjectMetadataCommand extends Command { + @Override + public void doGet(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + Project project = getProject(request); + + try { + respondJSON(response, ProjectManager.singleton.getProjectMetadata(project.id)); + } catch (JSONException e) { + respondException(response, e); + } + } +} diff --git a/main/src/com/google/refine/commands/project/ImportProjectCommand.java b/main/src/com/google/refine/commands/project/ImportProjectCommand.java new file mode 100644 index 000000000..a4598685f --- /dev/null +++ b/main/src/com/google/refine/commands/project/ImportProjectCommand.java @@ -0,0 +1,135 @@ +package com.google.refine.commands.project; + +import java.io.IOException; +import java.io.InputStream; +import java.net.URL; +import java.net.URLConnection; +import java.util.Properties; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.apache.commons.fileupload.FileItemIterator; +import org.apache.commons.fileupload.FileItemStream; +import org.apache.commons.fileupload.servlet.ServletFileUpload; +import org.apache.commons.fileupload.util.Streams; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.refine.ProjectManager; +import com.google.refine.ProjectMetadata; +import com.google.refine.commands.Command; +import com.google.refine.model.Project; +import com.google.refine.util.ParsingUtilities; + +public class ImportProjectCommand extends Command { + + final static Logger logger = LoggerFactory.getLogger("import-project_command"); + + @Override + public void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + ProjectManager.singleton.setBusy(true); + try { + Properties options = ParsingUtilities.parseUrlParameters(request); + + long projectID = Project.generateID(); + logger.info("Importing existing project using new ID {}", projectID); + + internalImport(request, options, projectID); + + ProjectManager.singleton.loadProjectMetadata(projectID); + + ProjectMetadata pm = ProjectManager.singleton.getProjectMetadata(projectID); + if (pm != null) { + if (options.containsKey("project-name")) { + String projectName = options.getProperty("project-name"); + if (projectName != null && projectName.length() > 0) { + pm.setName(projectName); + } + } + + redirect(response, "/project?project=" + projectID); + } else { + redirect(response, "/error.html?redirect=index&msg=" + + ParsingUtilities.encode("Failed to import project") + ); + } + } catch (Exception e) { + e.printStackTrace(); + } finally { + ProjectManager.singleton.setBusy(false); + } + } + + protected void internalImport( + HttpServletRequest request, + Properties options, + long projectID + ) throws Exception { + + String url = null; + + ServletFileUpload upload = new ServletFileUpload(); + + FileItemIterator iter = upload.getItemIterator(request); + while (iter.hasNext()) { + FileItemStream item = iter.next(); + String name = item.getFieldName().toLowerCase(); + InputStream stream = item.openStream(); + if (item.isFormField()) { + if (name.equals("url")) { + url = Streams.asString(stream); + } else { + options.put(name, Streams.asString(stream)); + } + } else { + String fileName = item.getName().toLowerCase(); + try { + ProjectManager.singleton.importProject(projectID, stream, !fileName.endsWith(".tar")); + } finally { + stream.close(); + } + } + } + + if (url != null && url.length() > 0) { + internalImportURL(request, options, projectID, url); + } + } + + protected void internalImportURL( + HttpServletRequest request, + Properties options, + long projectID, + String urlString + ) throws Exception { + URL url = new URL(urlString); + URLConnection connection = null; + + try { + connection = url.openConnection(); + connection.setConnectTimeout(5000); + connection.connect(); + } catch (Exception e) { + throw new Exception("Cannot connect to " + urlString, e); + } + + InputStream inputStream = null; + try { + inputStream = connection.getInputStream(); + } catch (Exception e) { + throw new Exception("Cannot retrieve content from " + url, e); + } + + try { + ProjectManager.singleton.importProject(projectID, inputStream, !urlString.endsWith(".tar")); + } finally { + inputStream.close(); + } + } + + +} diff --git a/main/src/com/google/refine/commands/project/RenameProjectCommand.java b/main/src/com/google/refine/commands/project/RenameProjectCommand.java new file mode 100644 index 000000000..4aa21fd8f --- /dev/null +++ b/main/src/com/google/refine/commands/project/RenameProjectCommand.java @@ -0,0 +1,28 @@ +package com.google.refine.commands.project; + +import java.io.IOException; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import com.google.refine.ProjectMetadata; +import com.google.refine.commands.Command; + +public class RenameProjectCommand extends Command { + @Override + public void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + try { + String name = request.getParameter("name"); + ProjectMetadata pm = getProjectMetadata(request); + + pm.setName(name); + + respond(response, "{ \"code\" : \"ok\" }"); + } catch (Exception e) { + respondException(response, e); + } + } +} diff --git a/main/src/com/google/refine/commands/recon/ReconDiscardJudgmentsCommand.java b/main/src/com/google/refine/commands/recon/ReconDiscardJudgmentsCommand.java new file mode 100644 index 000000000..4065e29d1 --- /dev/null +++ b/main/src/com/google/refine/commands/recon/ReconDiscardJudgmentsCommand.java @@ -0,0 +1,21 @@ +package com.google.refine.commands.recon; + +import javax.servlet.http.HttpServletRequest; + +import org.json.JSONObject; + +import com.google.refine.commands.EngineDependentCommand; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; +import com.google.refine.operations.recon.ReconDiscardJudgmentsOperation; + +public class ReconDiscardJudgmentsCommand extends EngineDependentCommand { + @Override + protected AbstractOperation createOperation(Project project, + HttpServletRequest request, JSONObject engineConfig) throws Exception { + + String columnName = request.getParameter("columnName"); + + return new ReconDiscardJudgmentsOperation(engineConfig, columnName); + } +} diff --git a/main/src/com/google/refine/commands/recon/ReconJudgeOneCellCommand.java b/main/src/com/google/refine/commands/recon/ReconJudgeOneCellCommand.java new file mode 100644 index 000000000..cd78528da --- /dev/null +++ b/main/src/com/google/refine/commands/recon/ReconJudgeOneCellCommand.java @@ -0,0 +1,214 @@ +package com.google.refine.commands.recon; + +import java.io.IOException; +import java.util.Properties; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.json.JSONWriter; + +import com.google.refine.commands.Command; +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.history.Change; +import com.google.refine.history.HistoryEntry; +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.Recon; +import com.google.refine.model.ReconCandidate; +import com.google.refine.model.ReconStats; +import com.google.refine.model.Recon.Judgment; +import com.google.refine.model.changes.CellChange; +import com.google.refine.model.changes.ReconChange; +import com.google.refine.process.QuickHistoryEntryProcess; +import com.google.refine.util.Pool; + +public class ReconJudgeOneCellCommand extends Command { + @Override + public void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + try { + Project project = getProject(request); + + int rowIndex = Integer.parseInt(request.getParameter("row")); + int cellIndex = Integer.parseInt(request.getParameter("cell")); + Judgment judgment = Recon.stringToJudgment(request.getParameter("judgment")); + + ReconCandidate match = null; + String id = request.getParameter("id"); + if (id != null) { + String scoreString = request.getParameter("score"); + + match = new ReconCandidate( + id, + request.getParameter("name"), + request.getParameter("types").split(","), + scoreString != null ? Double.parseDouble(scoreString) : 100 + ); + } + + JudgeOneCellProcess process = new JudgeOneCellProcess( + project, + "Judge one cell's recon result", + judgment, + rowIndex, + cellIndex, + match, + request.getParameter("identifierSpace"), + request.getParameter("schemaSpace") + ); + + HistoryEntry historyEntry = project.processManager.queueProcess(process); + if (historyEntry != null) { + /* + * If the process is done, write back the cell's data so that the + * client side can update its UI right away. + */ + JSONWriter writer = new JSONWriter(response.getWriter()); + + Pool pool = new Pool(); + Properties options = new Properties(); + options.put("pool", pool); + + writer.object(); + writer.key("code"); writer.value("ok"); + writer.key("historyEntry"); historyEntry.write(writer, options); + writer.key("cell"); process.newCell.write(writer, options); + writer.key("pool"); pool.write(writer, options); + writer.endObject(); + } else { + respond(response, "{ \"code\" : \"pending\" }"); + } + } catch (Exception e) { + respondException(response, e); + } + } + + protected static class JudgeOneCellProcess extends QuickHistoryEntryProcess { + + final int rowIndex; + final int cellIndex; + final Judgment judgment; + final ReconCandidate match; + final String identifierSpace; + final String schemaSpace; + + Cell newCell; + + JudgeOneCellProcess( + Project project, + String briefDescription, + Judgment judgment, + int rowIndex, + int cellIndex, + ReconCandidate match, + String identifierSpace, + String schemaSpace + ) { + super(project, briefDescription); + + this.judgment = judgment; + this.rowIndex = rowIndex; + this.cellIndex = cellIndex; + this.match = match; + this.identifierSpace = identifierSpace; + this.schemaSpace = schemaSpace; + } + + protected HistoryEntry createHistoryEntry(long historyEntryID) throws Exception { + Cell cell = _project.rows.get(rowIndex).getCell(cellIndex); + if (cell == null || !ExpressionUtils.isNonBlankData(cell.value)) { + throw new Exception("Cell is blank or error"); + } + + Column column = _project.columnModel.getColumnByCellIndex(cellIndex); + if (column == null) { + throw new Exception("No such column"); + } + + Judgment oldJudgment = cell.recon == null ? Judgment.None : cell.recon.judgment; + + newCell = new Cell( + cell.value, + cell.recon == null ? new Recon(historyEntryID, identifierSpace, schemaSpace) : cell.recon.dup(historyEntryID) + ); + + String cellDescription = + "single cell on row " + (rowIndex + 1) + + ", column " + column.getName() + + ", containing \"" + cell.value + "\""; + + String description = null; + + newCell.recon.matchRank = -1; + newCell.recon.judgmentAction = "single"; + newCell.recon.judgmentBatchSize = 1; + + if (judgment == Judgment.None) { + newCell.recon.judgment = Recon.Judgment.None; + newCell.recon.match = null; + + description = "Discard recon judgment for " + cellDescription; + } else if (judgment == Judgment.New) { + newCell.recon.judgment = Recon.Judgment.New; + newCell.recon.match = null; + + description = "Mark to create new topic for " + cellDescription; + } else { + newCell.recon.judgment = Recon.Judgment.Matched; + newCell.recon.match = this.match; + if (newCell.recon.candidates != null) { + for (int m = 0; m < newCell.recon.candidates.size(); m++) { + if (newCell.recon.candidates.get(m).id.equals(this.match.id)) { + newCell.recon.matchRank = m; + break; + } + } + } + + description = "Match " + this.match.name + + " (" + match.id + ") to " + + cellDescription; + } + + ReconStats stats = column.getReconStats(); + if (stats == null) { + stats = ReconStats.create(_project, cellIndex); + } else { + int newChange = 0; + int matchChange = 0; + + if (oldJudgment == Judgment.New) { + newChange--; + } + if (oldJudgment == Judgment.Matched) { + matchChange--; + } + if (newCell.recon.judgment == Judgment.New) { + newChange++; + } + if (newCell.recon.judgment == Judgment.Matched) { + matchChange++; + } + + stats = new ReconStats( + stats.nonBlanks, + stats.newTopics + newChange, + stats.matchedTopics + matchChange); + } + + Change change = new ReconChange( + new CellChange(rowIndex, cellIndex, cell, newCell), + column.getName(), + column.getReconConfig(), + stats + ); + + return new HistoryEntry( + historyEntryID, _project, description, null, change); + } + } +} diff --git a/main/src/com/google/refine/commands/recon/ReconJudgeSimilarCellsCommand.java b/main/src/com/google/refine/commands/recon/ReconJudgeSimilarCellsCommand.java new file mode 100644 index 000000000..d924ddb04 --- /dev/null +++ b/main/src/com/google/refine/commands/recon/ReconJudgeSimilarCellsCommand.java @@ -0,0 +1,49 @@ +package com.google.refine.commands.recon; + +import javax.servlet.http.HttpServletRequest; + +import org.json.JSONObject; + +import com.google.refine.commands.EngineDependentCommand; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; +import com.google.refine.model.Recon; +import com.google.refine.model.ReconCandidate; +import com.google.refine.model.Recon.Judgment; +import com.google.refine.operations.recon.ReconJudgeSimilarCellsOperation; + +public class ReconJudgeSimilarCellsCommand extends EngineDependentCommand { + + @Override + protected AbstractOperation createOperation( + Project project, HttpServletRequest request, JSONObject engineConfig) throws Exception { + + String columnName = request.getParameter("columnName"); + String similarValue = request.getParameter("similarValue"); + Judgment judgment = Recon.stringToJudgment(request.getParameter("judgment")); + + ReconCandidate match = null; + String id = request.getParameter("id"); + if (id != null) { + String scoreString = request.getParameter("score"); + + match = new ReconCandidate( + id, + request.getParameter("name"), + request.getParameter("types").split(","), + scoreString != null ? Double.parseDouble(scoreString) : 100 + ); + } + + String shareNewTopics = request.getParameter("shareNewTopics"); + + return new ReconJudgeSimilarCellsOperation( + engineConfig, + columnName, + similarValue, + judgment, + match, + "true".equals(shareNewTopics) + ); + } +} diff --git a/main/src/com/google/refine/commands/recon/ReconMarkNewTopicsCommand.java b/main/src/com/google/refine/commands/recon/ReconMarkNewTopicsCommand.java new file mode 100644 index 000000000..c2dce6c81 --- /dev/null +++ b/main/src/com/google/refine/commands/recon/ReconMarkNewTopicsCommand.java @@ -0,0 +1,24 @@ +package com.google.refine.commands.recon; + +import javax.servlet.http.HttpServletRequest; + +import org.json.JSONObject; + +import com.google.refine.commands.EngineDependentCommand; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; +import com.google.refine.operations.recon.ReconMarkNewTopicsOperation; + +public class ReconMarkNewTopicsCommand extends EngineDependentCommand { + + @Override + protected AbstractOperation createOperation(Project project, + HttpServletRequest request, JSONObject engineConfig) throws Exception { + + return new ReconMarkNewTopicsOperation( + engineConfig, + request.getParameter("columnName"), + "true".equals(request.getParameter("shareNewTopics")) + ); + } +} diff --git a/main/src/com/google/refine/commands/recon/ReconMatchBestCandidatesCommand.java b/main/src/com/google/refine/commands/recon/ReconMatchBestCandidatesCommand.java new file mode 100644 index 000000000..9152f20dc --- /dev/null +++ b/main/src/com/google/refine/commands/recon/ReconMatchBestCandidatesCommand.java @@ -0,0 +1,22 @@ +package com.google.refine.commands.recon; + +import javax.servlet.http.HttpServletRequest; + +import org.json.JSONObject; + +import com.google.refine.commands.EngineDependentCommand; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; +import com.google.refine.operations.recon.ReconMatchBestCandidatesOperation; + +public class ReconMatchBestCandidatesCommand extends EngineDependentCommand { + + @Override + protected AbstractOperation createOperation(Project project, + HttpServletRequest request, JSONObject engineConfig) throws Exception { + + String columnName = request.getParameter("columnName"); + + return new ReconMatchBestCandidatesOperation(engineConfig, columnName); + } +} diff --git a/main/src/com/google/refine/commands/recon/ReconMatchSpecificTopicCommand.java b/main/src/com/google/refine/commands/recon/ReconMatchSpecificTopicCommand.java new file mode 100644 index 000000000..e6931ac40 --- /dev/null +++ b/main/src/com/google/refine/commands/recon/ReconMatchSpecificTopicCommand.java @@ -0,0 +1,35 @@ +package com.google.refine.commands.recon; + +import javax.servlet.http.HttpServletRequest; + +import org.json.JSONObject; + +import com.google.refine.commands.EngineDependentCommand; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; +import com.google.refine.model.ReconCandidate; +import com.google.refine.operations.recon.ReconMatchSpecificTopicOperation; + +public class ReconMatchSpecificTopicCommand extends EngineDependentCommand { + + @Override + protected AbstractOperation createOperation(Project project, + HttpServletRequest request, JSONObject engineConfig) throws Exception { + + String columnName = request.getParameter("columnName"); + ReconCandidate match = new ReconCandidate( + request.getParameter("topicID"), + request.getParameter("topicName"), + request.getParameter("types").split(","), + 100 + ); + + return new ReconMatchSpecificTopicOperation( + engineConfig, + columnName, + match, + request.getParameter("identifierSpace"), + request.getParameter("schemaSpace") + ); + } +} diff --git a/main/src/com/google/refine/commands/recon/ReconcileCommand.java b/main/src/com/google/refine/commands/recon/ReconcileCommand.java new file mode 100644 index 000000000..cf29bf8c7 --- /dev/null +++ b/main/src/com/google/refine/commands/recon/ReconcileCommand.java @@ -0,0 +1,28 @@ +package com.google.refine.commands.recon; + +import javax.servlet.http.HttpServletRequest; + +import org.json.JSONObject; +import org.json.JSONTokener; + +import com.google.refine.commands.EngineDependentCommand; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; +import com.google.refine.model.recon.ReconConfig; +import com.google.refine.operations.recon.ReconOperation; + +public class ReconcileCommand extends EngineDependentCommand { + + @Override + protected AbstractOperation createOperation(Project project, + HttpServletRequest request, JSONObject engineConfig) throws Exception { + + String columnName = request.getParameter("columnName"); + String configString = request.getParameter("config"); + + JSONTokener t = new JSONTokener(configString); + JSONObject config = (JSONObject) t.nextValue(); + + return new ReconOperation(engineConfig, columnName, ReconConfig.reconstruct(config)); + } +} diff --git a/main/src/com/google/refine/commands/row/AnnotateOneRowCommand.java b/main/src/com/google/refine/commands/row/AnnotateOneRowCommand.java new file mode 100644 index 000000000..b926188eb --- /dev/null +++ b/main/src/com/google/refine/commands/row/AnnotateOneRowCommand.java @@ -0,0 +1,120 @@ +package com.google.refine.commands.row; + +import java.io.IOException; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import com.google.refine.commands.Command; +import com.google.refine.history.HistoryEntry; +import com.google.refine.model.Project; +import com.google.refine.model.changes.RowFlagChange; +import com.google.refine.model.changes.RowStarChange; +import com.google.refine.process.QuickHistoryEntryProcess; + +public class AnnotateOneRowCommand extends Command { + @Override + public void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + response.setCharacterEncoding("UTF-8"); + response.setHeader("Content-Type", "application/json"); + + try { + Project project = getProject(request); + + int rowIndex = Integer.parseInt(request.getParameter("row")); + + String starredString = request.getParameter("starred"); + if (starredString != null) { + boolean starred = "true".endsWith(starredString); + String description = (starred ? "Star row " : "Unstar row ") + (rowIndex + 1); + + StarOneRowProcess process = new StarOneRowProcess( + project, + description, + rowIndex, + starred + ); + + performProcessAndRespond(request, response, project, process); + return; + } + + String flaggedString = request.getParameter("flagged"); + if (flaggedString != null) { + boolean flagged = "true".endsWith(flaggedString); + String description = (flagged ? "Flag row " : "Unflag row ") + (rowIndex + 1); + + FlagOneRowProcess process = new FlagOneRowProcess( + project, + description, + rowIndex, + flagged + ); + + performProcessAndRespond(request, response, project, process); + return; + } + + respond(response, "{ \"code\" : \"error\", \"message\" : \"invalid command parameters\" }"); + + } catch (Exception e) { + respondException(response, e); + } + } + + protected static class StarOneRowProcess extends QuickHistoryEntryProcess { + final int rowIndex; + final boolean starred; + + StarOneRowProcess( + Project project, + String briefDescription, + int rowIndex, + boolean starred + ) { + super(project, briefDescription); + + this.rowIndex = rowIndex; + this.starred = starred; + } + + protected HistoryEntry createHistoryEntry(long historyEntryID) throws Exception { + return new HistoryEntry( + historyEntryID, + _project, + (starred ? "Star row " : "Unstar row ") + (rowIndex + 1), + null, + new RowStarChange(rowIndex, starred) + ); + } + } + protected static class FlagOneRowProcess extends QuickHistoryEntryProcess { + final int rowIndex; + final boolean flagged; + + FlagOneRowProcess( + Project project, + String briefDescription, + int rowIndex, + boolean flagged + ) { + super(project, briefDescription); + + this.rowIndex = rowIndex; + this.flagged = flagged; + } + + protected HistoryEntry createHistoryEntry(long historyEntryID) throws Exception { + return new HistoryEntry( + historyEntryID, + _project, + (flagged ? "Flag row " : "Unflag row ") + (rowIndex + 1), + null, + new RowFlagChange(rowIndex, flagged) + ); + } + } +} diff --git a/main/src/com/google/refine/commands/row/AnnotateRowsCommand.java b/main/src/com/google/refine/commands/row/AnnotateRowsCommand.java new file mode 100644 index 000000000..4248d1772 --- /dev/null +++ b/main/src/com/google/refine/commands/row/AnnotateRowsCommand.java @@ -0,0 +1,34 @@ +package com.google.refine.commands.row; + +import javax.servlet.http.HttpServletRequest; + +import org.json.JSONObject; + +import com.google.refine.commands.EngineDependentCommand; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; +import com.google.refine.operations.row.RowFlagOperation; +import com.google.refine.operations.row.RowStarOperation; + +public class AnnotateRowsCommand extends EngineDependentCommand { + + @Override + protected AbstractOperation createOperation(Project project, + HttpServletRequest request, JSONObject engineConfig) throws Exception { + + String starredString = request.getParameter("starred"); + if (starredString != null) { + boolean starred = "true".endsWith(starredString); + + return new RowStarOperation(engineConfig, starred); + } + + String flaggedString = request.getParameter("flagged"); + if (flaggedString != null) { + boolean flagged = "true".endsWith(flaggedString); + + return new RowFlagOperation(engineConfig, flagged); + } + return null; + } +} diff --git a/main/src/com/google/refine/commands/row/DenormalizeCommand.java b/main/src/com/google/refine/commands/row/DenormalizeCommand.java new file mode 100644 index 000000000..50d531274 --- /dev/null +++ b/main/src/com/google/refine/commands/row/DenormalizeCommand.java @@ -0,0 +1,32 @@ +package com.google.refine.commands.row; + +import java.io.IOException; +import java.util.Properties; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import com.google.refine.commands.Command; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; +import com.google.refine.operations.row.DenormalizeOperation; +import com.google.refine.process.Process; + +public class DenormalizeCommand extends Command { + @Override + public void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + try { + Project project = getProject(request); + + AbstractOperation op = new DenormalizeOperation(); + Process process = op.createProcess(project, new Properties()); + + performProcessAndRespond(request, response, project, process); + } catch (Exception e) { + respondException(response, e); + } + } +} diff --git a/main/src/com/google/refine/commands/row/GetRowsCommand.java b/main/src/com/google/refine/commands/row/GetRowsCommand.java new file mode 100644 index 000000000..0f5c00b5d --- /dev/null +++ b/main/src/com/google/refine/commands/row/GetRowsCommand.java @@ -0,0 +1,208 @@ +package com.google.refine.commands.row; + +import java.io.IOException; +import java.io.PrintWriter; +import java.util.Properties; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.browsing.Engine; +import com.google.refine.browsing.FilteredRecords; +import com.google.refine.browsing.FilteredRows; +import com.google.refine.browsing.RecordVisitor; +import com.google.refine.browsing.RowVisitor; +import com.google.refine.browsing.Engine.Mode; +import com.google.refine.commands.Command; +import com.google.refine.model.Project; +import com.google.refine.model.Record; +import com.google.refine.model.Row; +import com.google.refine.sorting.SortingRecordVisitor; +import com.google.refine.sorting.SortingRowVisitor; +import com.google.refine.util.ParsingUtilities; +import com.google.refine.util.Pool; + +public class GetRowsCommand extends Command { + + public void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + internalRespond(request, response); + } + + @Override + public void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + internalRespond(request, response); + } + + protected void internalRespond(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + try { + Project project = getProject(request); + Engine engine = getEngine(request, project); + String callback = request.getParameter("callback"); + + int start = Math.min(project.rows.size(), Math.max(0, getIntegerParameter(request, "start", 0))); + int limit = Math.min(project.rows.size() - start, Math.max(0, getIntegerParameter(request, "limit", 20))); + + Pool pool = new Pool(); + Properties options = new Properties(); + options.put("project", project); + options.put("reconCandidateOmitTypes", true); + options.put("pool", pool); + + response.setCharacterEncoding("UTF-8"); + response.setHeader("Content-Type", callback == null ? "application/json" : "text/javascript"); + + PrintWriter writer = response.getWriter(); + if (callback != null) { + writer.write(callback); + writer.write("("); + } + + JSONWriter jsonWriter = new JSONWriter(writer); + jsonWriter.object(); + + RowWritingVisitor rwv = new RowWritingVisitor(start, limit, jsonWriter, options); + + JSONObject sortingJson = null; + try{ + String json = request.getParameter("sorting"); + sortingJson = (json == null) ? null : + ParsingUtilities.evaluateJsonStringToObject(json); + } catch (JSONException e) { + } + + if (engine.getMode() == Mode.RowBased) { + FilteredRows filteredRows = engine.getAllFilteredRows(); + RowVisitor visitor = rwv; + + if (sortingJson != null) { + SortingRowVisitor srv = new SortingRowVisitor(visitor); + + srv.initializeFromJSON(project, sortingJson); + if (srv.hasCriteria()) { + visitor = srv; + } + } + + jsonWriter.key("mode"); jsonWriter.value("row-based"); + jsonWriter.key("rows"); jsonWriter.array(); + filteredRows.accept(project, visitor); + jsonWriter.endArray(); + jsonWriter.key("filtered"); jsonWriter.value(rwv.total); + jsonWriter.key("total"); jsonWriter.value(project.rows.size()); + } else { + FilteredRecords filteredRecords = engine.getFilteredRecords(); + RecordVisitor visitor = rwv; + + if (sortingJson != null) { + SortingRecordVisitor srv = new SortingRecordVisitor(visitor); + + srv.initializeFromJSON(project, sortingJson); + if (srv.hasCriteria()) { + visitor = srv; + } + } + + jsonWriter.key("mode"); jsonWriter.value("record-based"); + jsonWriter.key("rows"); jsonWriter.array(); + filteredRecords.accept(project, visitor); + jsonWriter.endArray(); + jsonWriter.key("filtered"); jsonWriter.value(rwv.total); + jsonWriter.key("total"); jsonWriter.value(project.recordModel.getRecordCount()); + } + + + jsonWriter.key("start"); jsonWriter.value(start); + jsonWriter.key("limit"); jsonWriter.value(limit); + jsonWriter.key("pool"); pool.write(jsonWriter, options); + + jsonWriter.endObject(); + + if (callback != null) { + writer.write(")"); + } + } catch (Exception e) { + respondException(response, e); + } + } + + static protected class RowWritingVisitor implements RowVisitor, RecordVisitor { + final int start; + final int limit; + final JSONWriter writer; + final Properties options; + + public int total; + + public RowWritingVisitor(int start, int limit, JSONWriter writer, Properties options) { + this.start = start; + this.limit = limit; + this.writer = writer; + this.options = options; + } + + @Override + public void start(Project project) { + // nothing to do + } + + @Override + public void end(Project project) { + // nothing to do + } + + public boolean visit(Project project, int rowIndex, Row row) { + if (total >= start && total < start + limit) { + internalVisit(project, rowIndex, row); + } + total++; + + return false; + } + + @Override + public boolean visit(Project project, Record record) { + if (total >= start && total < start + limit) { + internalVisit(project, record); + } + total++; + + return false; + } + + public boolean internalVisit(Project project, int rowIndex, Row row) { + try { + options.put("rowIndex", rowIndex); + row.write(writer, options); + } catch (JSONException e) { + } + return false; + } + + protected boolean internalVisit(Project project, Record record) { + options.put("recordIndex", record.recordIndex); + + for (int r = record.fromRowIndex; r < record.toRowIndex; r++) { + try { + Row row = project.rows.get(r); + + options.put("rowIndex", r); + + row.write(writer, options); + + } catch (JSONException e) { + } + + options.remove("recordIndex"); + } + return false; + } + } +} diff --git a/main/src/com/google/refine/commands/row/RemoveRowsCommand.java b/main/src/com/google/refine/commands/row/RemoveRowsCommand.java new file mode 100644 index 000000000..1f219f27d --- /dev/null +++ b/main/src/com/google/refine/commands/row/RemoveRowsCommand.java @@ -0,0 +1,20 @@ +package com.google.refine.commands.row; + +import javax.servlet.http.HttpServletRequest; + +import org.json.JSONObject; + +import com.google.refine.commands.EngineDependentCommand; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; +import com.google.refine.operations.row.RowRemovalOperation; + +public class RemoveRowsCommand extends EngineDependentCommand { + + @Override + protected AbstractOperation createOperation(Project project, + HttpServletRequest request, JSONObject engineConfig) throws Exception { + + return new RowRemovalOperation(engineConfig); + } +} diff --git a/main/src/com/google/refine/commands/row/ReorderRowsCommand.java b/main/src/com/google/refine/commands/row/ReorderRowsCommand.java new file mode 100644 index 000000000..c75a1e86c --- /dev/null +++ b/main/src/com/google/refine/commands/row/ReorderRowsCommand.java @@ -0,0 +1,34 @@ +package com.google.refine.commands.row; + +import javax.servlet.http.HttpServletRequest; + +import org.json.JSONException; +import org.json.JSONObject; + +import com.google.refine.browsing.Engine; +import com.google.refine.commands.EngineDependentCommand; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; +import com.google.refine.operations.row.RowReorderOperation; +import com.google.refine.util.ParsingUtilities; + +public class ReorderRowsCommand extends EngineDependentCommand { + + @Override + protected AbstractOperation createOperation(Project project, + HttpServletRequest request, JSONObject engineConfig) throws Exception { + + String mode = request.getParameter("mode"); + JSONObject sorting = null; + + try{ + String json = request.getParameter("sorting"); + + sorting = (json == null) ? null : ParsingUtilities.evaluateJsonStringToObject(json); + } catch (JSONException e) { + // ignore + } + + return new RowReorderOperation(Engine.stringToMode(mode), sorting); + } +} diff --git a/main/src/com/google/refine/commands/workspace/GetAllProjectMetadataCommand.java b/main/src/com/google/refine/commands/workspace/GetAllProjectMetadataCommand.java new file mode 100644 index 000000000..3bf30ebd7 --- /dev/null +++ b/main/src/com/google/refine/commands/workspace/GetAllProjectMetadataCommand.java @@ -0,0 +1,48 @@ +package com.google.refine.commands.workspace; + +import java.io.IOException; +import java.util.Map; +import java.util.Properties; +import java.util.Map.Entry; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.ProjectManager; +import com.google.refine.ProjectMetadata; +import com.google.refine.commands.Command; + +public class GetAllProjectMetadataCommand extends Command { + @Override + public void doGet(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + try { + response.setCharacterEncoding("UTF-8"); + response.setHeader("Content-Type", "application/json"); + + JSONWriter writer = new JSONWriter(response.getWriter()); + Properties options = new Properties(); + + writer.object(); + writer.key("projects"); + writer.object(); + Map m = ProjectManager.singleton.getAllProjectMetadata(); + for (Entry e : m.entrySet()) { + ProjectMetadata pm = e.getValue(); + if (pm != null) { + writer.key(e.getKey().toString()); + e.getValue().write(writer, options); + } + } + writer.endObject(); + writer.endObject(); + } catch (JSONException e) { + respondException(response, e); + } + } +} \ No newline at end of file diff --git a/main/src/com/google/refine/exporters/CsvExporter.java b/main/src/com/google/refine/exporters/CsvExporter.java new file mode 100644 index 000000000..a4f2fc2ce --- /dev/null +++ b/main/src/com/google/refine/exporters/CsvExporter.java @@ -0,0 +1,115 @@ +package com.google.refine.exporters; + +import java.io.IOException; +import java.io.OutputStream; +import java.io.Writer; +import java.util.Properties; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import au.com.bytecode.opencsv.CSVWriter; + +import com.google.refine.browsing.Engine; +import com.google.refine.browsing.FilteredRows; +import com.google.refine.browsing.RowVisitor; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.Row; + +public class CsvExporter implements Exporter{ + + final static Logger logger = LoggerFactory.getLogger("CsvExporter"); + char separator; + + public CsvExporter() { + separator = ','; //Comma separated-value is default + } + + public CsvExporter(char separator) { + this.separator = separator; + } + + @Override + public void export(Project project, Properties options, Engine engine, OutputStream outputStream) + throws IOException { + throw new RuntimeException("Not implemented"); + } + + @Override + public void export(Project project, Properties options, Engine engine, Writer writer) throws IOException { + boolean printColumnHeader = true; + + if (options != null && options.getProperty("printColumnHeader") != null) { + printColumnHeader = Boolean.parseBoolean(options.getProperty("printColumnHeader")); + } + + RowVisitor visitor = new RowVisitor() { + CSVWriter csvWriter; + boolean printColumnHeader = true; + boolean isFirstRow = true; //the first row should also add the column headers + + public RowVisitor init(CSVWriter writer, boolean printColumnHeader) { + this.csvWriter = writer; + this.printColumnHeader = printColumnHeader; + return this; + } + + public boolean visit(Project project, int rowIndex, Row row) { + int size = project.columnModel.columns.size(); + + String[] cols = new String[size]; + String[] vals = new String[size]; + + int i = 0; + for (Column col : project.columnModel.columns) { + int cellIndex = col.getCellIndex(); + cols[i] = col.getName(); + + Object value = row.getCellValue(cellIndex); + if (value != null) { + vals[i] = value instanceof String ? (String) value : value.toString(); + } + i++; + } + + if (printColumnHeader && isFirstRow) { + csvWriter.writeNext(cols,false); + isFirstRow = false; //switch off flag + } + csvWriter.writeNext(vals,false); + + return false; + } + + @Override + public void start(Project project) { + // nothing to do + } + + @Override + public void end(Project project) { + try { + csvWriter.close(); + } catch (IOException e) { + logger.error("CsvExporter could not close writer : " + e.getMessage()); + } + } + + }.init(new CSVWriter(writer, separator), printColumnHeader); + + FilteredRows filteredRows = engine.getAllFilteredRows(); + filteredRows.accept(project, visitor); + } + + @Override + public String getContentType() { + return "application/x-unknown"; + } + + @Override + public boolean takeWriter() { + return true; + } + +} diff --git a/main/src/com/google/refine/exporters/Exporter.java b/main/src/com/google/refine/exporters/Exporter.java new file mode 100644 index 000000000..f92b32fe4 --- /dev/null +++ b/main/src/com/google/refine/exporters/Exporter.java @@ -0,0 +1,19 @@ +package com.google.refine.exporters; + +import java.io.IOException; +import java.io.OutputStream; +import java.io.Writer; +import java.util.Properties; + +import com.google.refine.browsing.Engine; +import com.google.refine.model.Project; + +public interface Exporter { + public String getContentType(); + + public boolean takeWriter(); + + public void export(Project project, Properties options, Engine engine, OutputStream outputStream) throws IOException; + + public void export(Project project, Properties options, Engine engine, Writer writer) throws IOException; +} diff --git a/main/src/com/google/refine/exporters/ExporterRegistry.java b/main/src/com/google/refine/exporters/ExporterRegistry.java new file mode 100644 index 000000000..d9a11e082 --- /dev/null +++ b/main/src/com/google/refine/exporters/ExporterRegistry.java @@ -0,0 +1,31 @@ +package com.google.refine.exporters; + +import java.util.HashMap; +import java.util.Map; + +import com.google.refine.exporters.ProtographTransposeExporter.MqlwriteLikeExporter; +import com.google.refine.exporters.ProtographTransposeExporter.TripleLoaderExporter; + + +abstract public class ExporterRegistry { + static final private Map s_formatToExporter = new HashMap(); + + static { + s_formatToExporter.put("html", new HtmlTableExporter()); + s_formatToExporter.put("xls", new XlsExporter()); + s_formatToExporter.put("csv", new CsvExporter()); + + s_formatToExporter.put("template", new TemplatingExporter()); + + s_formatToExporter.put("tripleloader", new TripleLoaderExporter()); + s_formatToExporter.put("mqlwrite", new MqlwriteLikeExporter()); + } + + static public void registerExporter(String format, Exporter exporter) { + s_formatToExporter.put(format.toLowerCase(), exporter); + } + + static public Exporter getExporter(String format) { + return s_formatToExporter.get(format.toLowerCase()); + } +} diff --git a/main/src/com/google/refine/exporters/HtmlTableExporter.java b/main/src/com/google/refine/exporters/HtmlTableExporter.java new file mode 100644 index 000000000..f44929386 --- /dev/null +++ b/main/src/com/google/refine/exporters/HtmlTableExporter.java @@ -0,0 +1,105 @@ +package com.google.refine.exporters; + +import java.io.IOException; +import java.io.OutputStream; +import java.io.Writer; +import java.util.Properties; + +import com.google.refine.ProjectManager; +import com.google.refine.browsing.Engine; +import com.google.refine.browsing.FilteredRows; +import com.google.refine.browsing.RowVisitor; +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.Row; + +public class HtmlTableExporter implements Exporter { + public String getContentType() { + return "text/html"; + } + + public boolean takeWriter() { + return true; + } + + public void export(Project project, Properties options, Engine engine, + OutputStream outputStream) throws IOException { + throw new RuntimeException("Not implemented"); + } + + public void export(Project project, Properties options, Engine engine, Writer writer) throws IOException { + writer.write("\n"); + writer.write(""); + writer.write(ProjectManager.singleton.getProjectMetadata(project.id).getName()); + writer.write("\n"); + + writer.write("\n"); + writer.write("\n"); + + writer.write(""); + { + for (Column column : project.columnModel.columns) { + writer.write(""); + } + } + writer.write("\n"); + + { + RowVisitor visitor = new RowVisitor() { + Writer writer; + + public RowVisitor init(Writer writer) { + this.writer = writer; + return this; + } + + @Override + public void start(Project project) { + // nothing to do + } + + @Override + public void end(Project project) { + // nothing to do + } + + public boolean visit(Project project, int rowIndex, Row row) { + try { + writer.write(""); + + for (Column column : project.columnModel.columns) { + writer.write(""); + } + + writer.write("\n"); + } catch (IOException e) { + // ignore + } + return false; + } + }.init(writer); + + FilteredRows filteredRows = engine.getAllFilteredRows(); + filteredRows.accept(project, visitor); + } + + writer.write("
"); + writer.write(column.getName()); + writer.write("
"); + + int cellIndex = column.getCellIndex(); + if (cellIndex < row.cells.size()) { + Cell cell = row.cells.get(cellIndex); + if (cell != null && cell.value != null) { + Object v = cell.value; + writer.write(v instanceof String ? ((String) v) : v.toString()); + } + } + + writer.write("
\n"); + writer.write("\n"); + writer.write("\n"); + } + +} diff --git a/main/src/com/google/refine/exporters/ProtographTransposeExporter.java b/main/src/com/google/refine/exporters/ProtographTransposeExporter.java new file mode 100644 index 000000000..b475dee3d --- /dev/null +++ b/main/src/com/google/refine/exporters/ProtographTransposeExporter.java @@ -0,0 +1,74 @@ +package com.google.refine.exporters; + +import java.io.IOException; +import java.io.OutputStream; +import java.io.Writer; +import java.util.Properties; + +import com.google.refine.browsing.Engine; +import com.google.refine.model.Project; +import com.google.refine.protograph.Protograph; +import com.google.refine.protograph.transpose.MqlwriteLikeTransposedNodeFactory; +import com.google.refine.protograph.transpose.TransposedNodeFactory; +import com.google.refine.protograph.transpose.Transposer; +import com.google.refine.protograph.transpose.TripleLoaderTransposedNodeFactory; + +abstract public class ProtographTransposeExporter implements Exporter { + final protected String _contentType; + + public ProtographTransposeExporter(String contentType) { + _contentType = contentType; + } + + public String getContentType() { + return "application/x-unknown"; + } + + public boolean takeWriter() { + return true; + } + + public void export(Project project, Properties options, Engine engine, + OutputStream outputStream) throws IOException { + throw new RuntimeException("Not implemented"); + } + + public void export(Project project, Properties options, Engine engine, + Writer writer) throws IOException { + + Protograph protograph = (Protograph) project.overlayModels.get("freebaseProtograph"); + if (protograph != null) { + TransposedNodeFactory nodeFactory = createNodeFactory(project, writer); + + Transposer.transpose(project, engine.getAllFilteredRows(), + protograph, protograph.getRootNode(0), nodeFactory, -1); + + nodeFactory.flush(); + } + } + + abstract protected TransposedNodeFactory createNodeFactory(Project project, Writer writer); + + static public class TripleLoaderExporter extends ProtographTransposeExporter { + public TripleLoaderExporter() { + super("application/x-unknown"); + } + + @Override + protected TransposedNodeFactory createNodeFactory(Project project, Writer writer) { + return new TripleLoaderTransposedNodeFactory(project, writer); + } + } + + static public class MqlwriteLikeExporter extends ProtographTransposeExporter { + public MqlwriteLikeExporter() { + super("application/x-unknown"); + } + + @Override + protected TransposedNodeFactory createNodeFactory(Project project, Writer writer) { + return new MqlwriteLikeTransposedNodeFactory(writer); + } + } + +} diff --git a/main/src/com/google/refine/exporters/TemplatingExporter.java b/main/src/com/google/refine/exporters/TemplatingExporter.java new file mode 100644 index 000000000..b5c72436d --- /dev/null +++ b/main/src/com/google/refine/exporters/TemplatingExporter.java @@ -0,0 +1,125 @@ +package com.google.refine.exporters; + +import java.io.IOException; +import java.io.OutputStream; +import java.io.StringWriter; +import java.io.Writer; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.browsing.Engine; +import com.google.refine.browsing.FilteredRecords; +import com.google.refine.browsing.FilteredRows; +import com.google.refine.browsing.RecordVisitor; +import com.google.refine.browsing.RowVisitor; +import com.google.refine.browsing.Engine.Mode; +import com.google.refine.expr.ParsingException; +import com.google.refine.model.Project; +import com.google.refine.sorting.SortingRecordVisitor; +import com.google.refine.sorting.SortingRowVisitor; +import com.google.refine.templating.Parser; +import com.google.refine.templating.Template; +import com.google.refine.util.ParsingUtilities; + +public class TemplatingExporter implements Exporter { + public String getContentType() { + return "application/x-unknown"; + } + + public boolean takeWriter() { + return true; + } + + public void export(Project project, Properties options, Engine engine, + OutputStream outputStream) throws IOException { + throw new RuntimeException("Not implemented"); + } + + public void export(Project project, Properties options, Engine engine, Writer writer) throws IOException { + String limitString = options.getProperty("limit"); + int limit = limitString != null ? Integer.parseInt(limitString) : -1; + + JSONObject sortingJson = null; + try{ + String json = options.getProperty("sorting"); + sortingJson = (json == null) ? null : + ParsingUtilities.evaluateJsonStringToObject(json); + } catch (JSONException e) { + } + + String templateString = options.getProperty("template"); + String prefixString = options.getProperty("prefix"); + String suffixString = options.getProperty("suffix"); + String separatorString = options.getProperty("separator"); + + Template template; + try { + template = Parser.parse(templateString); + } catch (ParsingException e) { + throw new IOException("Missing or bad template", e); + } + + template.setPrefix(prefixString); + template.setSuffix(suffixString); + template.setSeparator(separatorString); + + if (!"true".equals(options.getProperty("preview"))) { + StringWriter stringWriter = new StringWriter(); + JSONWriter jsonWriter = new JSONWriter(stringWriter); + try { + jsonWriter.object(); + jsonWriter.key("template"); jsonWriter.value(templateString); + jsonWriter.key("prefix"); jsonWriter.value(prefixString); + jsonWriter.key("suffix"); jsonWriter.value(suffixString); + jsonWriter.key("separator"); jsonWriter.value(separatorString); + jsonWriter.endObject(); + } catch (JSONException e) { + // ignore + } + + project.getMetadata().getPreferenceStore().put("exporters.templating.template", stringWriter.toString()); + } + + if (engine.getMode() == Mode.RowBased) { + FilteredRows filteredRows = engine.getAllFilteredRows(); + RowVisitor visitor = template.getRowVisitor(writer, limit); + + if (sortingJson != null) { + try { + SortingRowVisitor srv = new SortingRowVisitor(visitor); + srv.initializeFromJSON(project, sortingJson); + + if (srv.hasCriteria()) { + visitor = srv; + } + } catch (JSONException e) { + e.printStackTrace(); + } + } + + filteredRows.accept(project, visitor); + } else { + FilteredRecords filteredRecords = engine.getFilteredRecords(); + RecordVisitor visitor = template.getRecordVisitor(writer, limit); + + if (sortingJson != null) { + try { + SortingRecordVisitor srv = new SortingRecordVisitor(visitor); + srv.initializeFromJSON(project, sortingJson); + + if (srv.hasCriteria()) { + visitor = srv; + } + } catch (JSONException e) { + e.printStackTrace(); + } + } + + filteredRecords.accept(project, visitor); + } + } + +} diff --git a/main/src/com/google/refine/exporters/XlsExporter.java b/main/src/com/google/refine/exporters/XlsExporter.java new file mode 100644 index 000000000..8fb50af17 --- /dev/null +++ b/main/src/com/google/refine/exporters/XlsExporter.java @@ -0,0 +1,126 @@ +package com.google.refine.exporters; + +import java.io.IOException; +import java.io.OutputStream; +import java.io.Writer; +import java.util.Calendar; +import java.util.Date; +import java.util.Properties; + +import org.apache.poi.hssf.usermodel.HSSFHyperlink; +import org.apache.poi.hssf.usermodel.HSSFWorkbook; +import org.apache.poi.ss.usermodel.Sheet; +import org.apache.poi.ss.usermodel.Workbook; + +import com.google.refine.ProjectManager; +import com.google.refine.browsing.Engine; +import com.google.refine.browsing.FilteredRows; +import com.google.refine.browsing.RowVisitor; +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.Row; + +public class XlsExporter implements Exporter { + public String getContentType() { + return "application/xls"; + } + + public boolean takeWriter() { + return false; + } + + public void export(Project project, Properties options, Engine engine, Writer writer) throws IOException { + throw new RuntimeException("Not implemented"); + } + + public void export(Project project, Properties options, Engine engine, + OutputStream outputStream) throws IOException { + + Workbook wb = new HSSFWorkbook(); + Sheet s = wb.createSheet(); + wb.setSheetName(0, ProjectManager.singleton.getProjectMetadata(project.id).getName()); + + int rowCount = 0; + + { + org.apache.poi.ss.usermodel.Row r = s.createRow(rowCount++); + + int cellCount = 0; + for (Column column : project.columnModel.columns) { + org.apache.poi.ss.usermodel.Cell c = r.createCell(cellCount++); + c.setCellValue(column.getName()); + } + } + + { + RowVisitor visitor = new RowVisitor() { + Sheet sheet; + int rowCount; + + public RowVisitor init(Sheet sheet, int rowCount) { + this.sheet = sheet; + this.rowCount = rowCount; + return this; + } + + @Override + public void start(Project project) { + // nothing to do + } + + @Override + public void end(Project project) { + // nothing to do + } + + public boolean visit(Project project, int rowIndex, Row row) { + org.apache.poi.ss.usermodel.Row r = sheet.createRow(rowCount++); + + int cellCount = 0; + for (Column column : project.columnModel.columns) { + org.apache.poi.ss.usermodel.Cell c = r.createCell(cellCount++); + + int cellIndex = column.getCellIndex(); + if (cellIndex < row.cells.size()) { + Cell cell = row.cells.get(cellIndex); + if (cell != null) { + if (cell.recon != null && cell.recon.match != null) { + c.setCellValue(cell.recon.match.name); + + HSSFHyperlink hl = new HSSFHyperlink(HSSFHyperlink.LINK_URL); + hl.setLabel(cell.recon.match.name); + hl.setAddress("http://www.freebase.com/view" + cell.recon.match.id); + + c.setHyperlink(hl); + } else if (cell.value != null) { + Object v = cell.value; + + if (v instanceof Number) { + c.setCellValue(((Number) v).doubleValue()); + } else if (v instanceof Boolean) { + c.setCellValue(((Boolean) v).booleanValue()); + } else if (v instanceof Date) { + c.setCellValue((Date) v); + } else if (v instanceof Calendar) { + c.setCellValue((Calendar) v); + } else if (v instanceof String) { + c.setCellValue((String) v); + } + } + } + } + } + return false; + } + }.init(s, rowCount); + + FilteredRows filteredRows = engine.getAllFilteredRows(); + filteredRows.accept(project, visitor); + } + + wb.write(outputStream); + outputStream.flush(); + } + +} diff --git a/main/src/com/google/refine/expr/Binder.java b/main/src/com/google/refine/expr/Binder.java new file mode 100644 index 000000000..43df874db --- /dev/null +++ b/main/src/com/google/refine/expr/Binder.java @@ -0,0 +1,13 @@ +package com.google.refine.expr; + +import java.util.Properties; + +import com.google.refine.model.Cell; +import com.google.refine.model.Project; +import com.google.refine.model.Row; + +public interface Binder { + public void initializeBindings(Properties bindings, Project project); + + public void bind(Properties bindings, Row row, int rowIndex, String columnName, Cell cell); +} diff --git a/main/src/com/google/refine/expr/CellTuple.java b/main/src/com/google/refine/expr/CellTuple.java new file mode 100644 index 000000000..dcf1ecf09 --- /dev/null +++ b/main/src/com/google/refine/expr/CellTuple.java @@ -0,0 +1,35 @@ +package com.google.refine.expr; + +import java.util.Properties; + +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.Row; + +public class CellTuple implements HasFields { + final public Project project; + final public Row row; + + public CellTuple(Project project, Row row) { + this.project = project; + this.row = row; + } + + public Object getField(String name, Properties bindings) { + Column column = project.columnModel.getColumnByName(name); + if (column != null) { + int cellIndex = column.getCellIndex(); + Cell cell = row.getCell(cellIndex); + + if (cell != null) { + return new WrappedCell(project, name, cell); + } + } + return null; + } + + public boolean fieldAlsoHasFields(String name) { + return true; + } +} \ No newline at end of file diff --git a/main/src/com/google/refine/expr/EvalError.java b/main/src/com/google/refine/expr/EvalError.java new file mode 100644 index 000000000..f1ef19cb6 --- /dev/null +++ b/main/src/com/google/refine/expr/EvalError.java @@ -0,0 +1,39 @@ +package com.google.refine.expr; + +import java.io.Serializable; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.Jsonizable; + +/** + * An error that occurs during the evaluation of an Evaluable. Errors are values, too + * because they can be stored in cells just like strings, numbers, etc. Errors are not + * thrown because an error might occupy just one element in an array and doesn't need + * to make the whole array erroneous. + */ +public class EvalError implements Serializable, Jsonizable { + private static final long serialVersionUID = -102681220092874080L; + + final public String message; + + public EvalError(String message) { + this.message = message; + } + + public String toString() { + return this.message; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("type"); writer.value("error"); + writer.key("message"); writer.value(message); + writer.endObject(); + } + +} diff --git a/main/src/com/google/refine/expr/Evaluable.java b/main/src/com/google/refine/expr/Evaluable.java new file mode 100644 index 000000000..c32ac8ac4 --- /dev/null +++ b/main/src/com/google/refine/expr/Evaluable.java @@ -0,0 +1,16 @@ +package com.google.refine.expr; + +import java.util.Properties; + +/** + * Interface for evaluable expressions in any arbitrary language. + */ +public interface Evaluable { + /** + * Evaluate this expression in the given environment (bindings). + * + * @param bindings + * @return + */ + public Object evaluate(Properties bindings); +} diff --git a/main/src/com/google/refine/expr/ExpressionUtils.java b/main/src/com/google/refine/expr/ExpressionUtils.java new file mode 100644 index 000000000..a5233d7a1 --- /dev/null +++ b/main/src/com/google/refine/expr/ExpressionUtils.java @@ -0,0 +1,146 @@ +package com.google.refine.expr; + +import java.io.Serializable; +import java.util.Calendar; +import java.util.Collection; +import java.util.Date; +import java.util.HashSet; +import java.util.List; +import java.util.Properties; +import java.util.Set; + +import org.json.JSONArray; +import org.json.JSONObject; + +import com.google.refine.model.Cell; +import com.google.refine.model.Project; +import com.google.refine.model.Row; + +public class ExpressionUtils { + static protected Set s_binders = new HashSet(); + + static public void registerBinder(Binder binder) { + s_binders.add(binder); + } + + static public Properties createBindings(Project project) { + Properties bindings = new Properties(); + + bindings.put("true", true); + bindings.put("false", false); + + bindings.put("project", project); + + for (Binder binder : s_binders) { + binder.initializeBindings(bindings, project); + } + + return bindings; + } + + static public void bind(Properties bindings, Row row, int rowIndex, String columnName, Cell cell) { + Project project = (Project) bindings.get("project"); + + bindings.put("rowIndex", rowIndex); + bindings.put("row", new WrappedRow(project, rowIndex, row)); + bindings.put("cells", new CellTuple(project, row)); + + if (columnName != null) { + bindings.put("columnName", columnName); + } + + if (cell == null) { + bindings.remove("cell"); + bindings.remove("value"); + } else { + bindings.put("cell", new WrappedCell(project, columnName, cell)); + if (cell.value == null) { + bindings.remove("value"); + } else { + bindings.put("value", cell.value); + } + } + + for (Binder binder : s_binders) { + binder.bind(bindings, row, rowIndex, columnName, cell); + } + } + + static public boolean isError(Object o) { + return o != null && o instanceof EvalError; + } + /* + static public boolean isBlank(Object o) { + return o == null || (o instanceof String && ((String) o).length() == 0); + } + */ + static public boolean isNonBlankData(Object o) { + return + o != null && + !(o instanceof EvalError) && + (!(o instanceof String) || ((String) o).length() > 0); + } + + static public boolean isTrue(Object o) { + return o != null && + (o instanceof Boolean ? + ((Boolean) o).booleanValue() : + Boolean.parseBoolean(o.toString())); + } + + static public boolean sameValue(Object v1, Object v2) { + if (v1 == null) { + return (v2 == null) + || (v2 instanceof String && ((String) v2).length() == 0); + } else if (v2 == null) { + return (v1 == null) + || (v1 instanceof String && ((String) v1).length() == 0); + } else { + return v1.equals(v2); + } + } + + static public boolean isStorable(Object v) { + return v == null || + v instanceof Number || + v instanceof String || + v instanceof Boolean || + v instanceof Date || + v instanceof Calendar || + v instanceof EvalError; + } + + static public Serializable wrapStorable(Object v) { + if (v instanceof JSONArray) { + return ((JSONArray) v).toString(); + } else if (v instanceof JSONObject) { + return ((JSONObject) v).toString(); + } else { + return isStorable(v) ? + (Serializable) v : + new EvalError(v.getClass().getSimpleName() + " value not storable"); + } + } + + static public boolean isArray(Object v) { + return v != null && v.getClass().isArray(); + } + + static public boolean isArrayOrCollection(Object v) { + return v != null && (v.getClass().isArray() || v instanceof Collection); + } + + static public boolean isArrayOrList(Object v) { + return v != null && (v.getClass().isArray() || v instanceof List); + } + + @SuppressWarnings("unchecked") + static public List toObjectList(Object v) { + return (List) v; + } + + @SuppressWarnings("unchecked") + static public Collection toObjectCollection(Object v) { + return (Collection) v; + } +} diff --git a/main/src/com/google/refine/expr/HasFields.java b/main/src/com/google/refine/expr/HasFields.java new file mode 100644 index 000000000..18a4b5303 --- /dev/null +++ b/main/src/com/google/refine/expr/HasFields.java @@ -0,0 +1,13 @@ +package com.google.refine.expr; + +import java.util.Properties; + +/** + * Interface for objects that have named fields, which can be retrieved using the + * dot notation or the bracket notation, e.g., cells.Country, cells["Type of Disaster"]. + */ +public interface HasFields { + public Object getField(String name, Properties bindings); + + public boolean fieldAlsoHasFields(String name); +} diff --git a/main/src/com/google/refine/expr/HasFieldsList.java b/main/src/com/google/refine/expr/HasFieldsList.java new file mode 100644 index 000000000..902bd6a9d --- /dev/null +++ b/main/src/com/google/refine/expr/HasFieldsList.java @@ -0,0 +1,14 @@ +package com.google.refine.expr; + +/** + * Interface for objects each of which is a list of HasFields objects of the + * same kind (e.g., list of cells). Its getField method thus returns either + * another HasFieldsList object or an array or java.util.List of objects. + */ +public interface HasFieldsList extends HasFields { + public int length(); + + public HasFields get(int index); + + public HasFieldsList getSubList(int from, int to); +} diff --git a/main/src/com/google/refine/expr/HasFieldsListImpl.java b/main/src/com/google/refine/expr/HasFieldsListImpl.java new file mode 100644 index 000000000..30c54e8fc --- /dev/null +++ b/main/src/com/google/refine/expr/HasFieldsListImpl.java @@ -0,0 +1,41 @@ +package com.google.refine.expr; + +import java.util.ArrayList; +import java.util.Properties; + +public class HasFieldsListImpl extends ArrayList implements HasFieldsList { + private static final long serialVersionUID = -8635194387420305802L; + + public Object getField(String name, Properties bindings) { + int c = size(); + if (c > 0 && get(0).fieldAlsoHasFields(name)) { + HasFieldsListImpl l = new HasFieldsListImpl(); + for (int i = 0; i < size(); i++) { + l.add(i, (HasFields) this.get(i).getField(name, bindings)); + } + return l; + } else { + Object[] r = new Object[this.size()]; + for (int i = 0; i < r.length; i++) { + r[i] = this.get(i).getField(name, bindings); + } + return r; + } + } + + public int length() { + return size(); + } + + public boolean fieldAlsoHasFields(String name) { + int c = size(); + return (c > 0 && get(0).fieldAlsoHasFields(name)); + } + + public HasFieldsList getSubList(int fromIndex, int toIndex) { + HasFieldsListImpl subList = new HasFieldsListImpl(); + subList.addAll(this.subList(fromIndex, toIndex)); + + return subList; + } +} diff --git a/main/src/com/google/refine/expr/LanguageSpecificParser.java b/main/src/com/google/refine/expr/LanguageSpecificParser.java new file mode 100644 index 000000000..3090cba3e --- /dev/null +++ b/main/src/com/google/refine/expr/LanguageSpecificParser.java @@ -0,0 +1,5 @@ +package com.google.refine.expr; + +public interface LanguageSpecificParser { + public Evaluable parse(String s) throws ParsingException; +} diff --git a/main/src/com/google/refine/expr/MetaParser.java b/main/src/com/google/refine/expr/MetaParser.java new file mode 100644 index 000000000..79b912ace --- /dev/null +++ b/main/src/com/google/refine/expr/MetaParser.java @@ -0,0 +1,118 @@ +package com.google.refine.expr; + +import java.io.StringReader; +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; +import java.util.Set; + +import clojure.lang.IFn; + +import com.google.refine.gel.Parser; + +abstract public class MetaParser { + static public class LanguageInfo { + final public String name; + final public LanguageSpecificParser parser; + final public String defaultExpression; + + LanguageInfo(String name, LanguageSpecificParser parser, String defaultExpression) { + this.name = name; + this.parser = parser; + this.defaultExpression = defaultExpression; + } + } + + static protected Map s_languages; + static { + s_languages = new HashMap(); + + registerLanguageParser("gel", "Gridworks Expression Language (GEL)", new LanguageSpecificParser() { + + @Override + public Evaluable parse(String s) throws ParsingException { + return parseGEL(s); + } + }, "value"); + + registerLanguageParser("clojure", "Clojure", new LanguageSpecificParser() { + + @Override + public Evaluable parse(String s) throws ParsingException { + try { + IFn fn = (IFn) clojure.lang.Compiler.load(new StringReader( + "(fn [value cell cells row rowIndex] " + s + ")" + )); + + return new Evaluable() { + private IFn _fn; + + public Evaluable init(IFn fn) { + _fn = fn; + return this; + } + + public Object evaluate(Properties bindings) { + try { + return _fn.invoke( + bindings.get("value"), + bindings.get("cell"), + bindings.get("cells"), + bindings.get("row"), + bindings.get("rowIndex") + ); + } catch (Exception e) { + return new EvalError(e.getMessage()); + } + } + }.init(fn); + } catch (Exception e) { + throw new ParsingException(e.getMessage()); + } + } + }, "value"); + } + + static public void registerLanguageParser(String languagePrefix, String name, LanguageSpecificParser parser, String defaultExpression) { + s_languages.put(languagePrefix, new LanguageInfo(name, parser, defaultExpression)); + } + + static public LanguageInfo getLanguageInfo(String languagePrefix) { + return s_languages.get(languagePrefix.toLowerCase()); + } + + static public Set getLanguagePrefixes() { + return s_languages.keySet(); + } + + /** + * Parse an expression that might have a language prefix into an Evaluable. + * Expressions without valid prefixes or without any prefix are assumed to be + * GEL expressions. + * + * @param s + * @return + * @throws ParsingException + */ + static public Evaluable parse(String s) throws ParsingException { + String language = "gel"; + + int colon = s.indexOf(':'); + if (colon >= 0) { + language = s.substring(0, colon); + } + + LanguageInfo info = s_languages.get(language.toLowerCase()); + if (info != null) { + return info.parser.parse(s.substring(colon + 1)); + } else { + return parseGEL(s); + } + } + + static protected Evaluable parseGEL(String s) throws ParsingException { + Parser parser = new Parser(s); + + return parser.getExpression(); + } +} diff --git a/main/src/com/google/refine/expr/ParsingException.java b/main/src/com/google/refine/expr/ParsingException.java new file mode 100644 index 000000000..90bc43360 --- /dev/null +++ b/main/src/com/google/refine/expr/ParsingException.java @@ -0,0 +1,16 @@ +/** + * + */ +package com.google.refine.expr; + +public class ParsingException extends Exception { + private static final long serialVersionUID = 155004505172098755L; + + public ParsingException(String message) { + super(message); + } + + public ParsingException(String message, Throwable cause) { + super(message, cause); + } +} \ No newline at end of file diff --git a/main/src/com/google/refine/expr/WrappedCell.java b/main/src/com/google/refine/expr/WrappedCell.java new file mode 100644 index 000000000..01f9ca492 --- /dev/null +++ b/main/src/com/google/refine/expr/WrappedCell.java @@ -0,0 +1,26 @@ +package com.google.refine.expr; + +import java.util.Properties; + +import com.google.refine.model.Cell; +import com.google.refine.model.Project; + +public class WrappedCell implements HasFields { + final public Project project; + final public String columnName; + final public Cell cell; + + public WrappedCell(Project project, String columnName, Cell cell) { + this.project = project; + this.columnName = columnName; + this.cell = cell; + } + + public Object getField(String name, Properties bindings) { + return cell.getField(name, bindings); + } + + public boolean fieldAlsoHasFields(String name) { + return cell.fieldAlsoHasFields(name); + } +} diff --git a/main/src/com/google/refine/expr/WrappedRow.java b/main/src/com/google/refine/expr/WrappedRow.java new file mode 100644 index 000000000..431b5ec72 --- /dev/null +++ b/main/src/com/google/refine/expr/WrappedRow.java @@ -0,0 +1,93 @@ +package com.google.refine.expr; + +import java.util.Properties; + +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.Record; +import com.google.refine.model.Row; + +public class WrappedRow implements HasFields { + final public Project project; + final public int rowIndex; + final public Row row; + + public WrappedRow(Project project, int rowIndex, Row row) { + this.project = project; + this.rowIndex = rowIndex; + this.row = row; + } + + public Object getField(String name, Properties bindings) { + if ("cells".equals(name)) { + return new CellTuple(project, row); + } else if ("index".equals(name)) { + return rowIndex; + } else if ("record".equals(name)) { + int rowIndex = (Integer) bindings.get("rowIndex"); + + return new WrappedRecord(project.recordModel.getRecordOfRow(rowIndex)); + } else if ("columnNames".equals(name)) { + Project project = (Project) bindings.get("project"); + + return project.columnModel.getColumnNames(); + } else { + return row.getField(name, bindings); + } + } + + public boolean fieldAlsoHasFields(String name) { + return row.fieldAlsoHasFields(name); + } + + protected class WrappedRecord implements HasFields { + final Record _record; + + protected WrappedRecord(Record record) { + _record = record; + } + + public Object getField(String name, Properties bindings) { + if ("cells".equals(name)) { + return new RecordCells(_record); + } + return null; + } + + public boolean fieldAlsoHasFields(String name) { + return "cells".equals(name); + } + } + + protected class RecordCells implements HasFields { + final Record _record; + + protected RecordCells(Record record) { + _record = record; + } + + public Object getField(String name, Properties bindings) { + Column column = project.columnModel.getColumnByName(name); + if (column != null) { + int cellIndex = column.getCellIndex(); + + HasFieldsListImpl cells = new HasFieldsListImpl(); + for (int r = _record.fromRowIndex; r < _record.toRowIndex; r++) { + Row row = project.rows.get(r); + Cell cell = row.getCell(cellIndex); + if (cell != null && ExpressionUtils.isNonBlankData(cell.value)) { + cells.add(new WrappedCell(project, name, cell)); + } + } + + return cells; + } + return null; + } + + public boolean fieldAlsoHasFields(String name) { + return true; + } + } +} diff --git a/main/src/com/google/refine/expr/functions/Cross.java b/main/src/com/google/refine/expr/functions/Cross.java new file mode 100644 index 000000000..e5bdf5a67 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/Cross.java @@ -0,0 +1,53 @@ +package com.google.refine.expr.functions; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.ProjectManager; +import com.google.refine.InterProjectModel.ProjectJoin; +import com.google.refine.expr.EvalError; +import com.google.refine.expr.WrappedCell; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.Function; +import com.google.refine.model.Project; + +public class Cross implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 3) { + // from project is implied + + Object wrappedCell = args[0]; // from cell + Object toProjectName = args[1]; + Object toColumnName = args[2]; + + if (wrappedCell != null && wrappedCell instanceof WrappedCell && + toProjectName != null && toProjectName instanceof String && + toColumnName != null && toColumnName instanceof String) { + + ProjectJoin join = ProjectManager.singleton.getInterProjectModel().getJoin( + ProjectManager.singleton.getProjectMetadata( + ((Project) bindings.get("project")).id).getName(), + ((WrappedCell) wrappedCell).columnName, + (String) toProjectName, + (String) toColumnName + ); + + return join.getRows(((WrappedCell) wrappedCell).cell.value); + } + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a cell, a project name to join with, and a column name in that project"); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("TODO"); + writer.key("params"); writer.value("cell c, string projectName, string columnName"); + writer.key("returns"); writer.value("array"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/FacetCount.java b/main/src/com/google/refine/expr/functions/FacetCount.java new file mode 100644 index 000000000..eb6f7565e --- /dev/null +++ b/main/src/com/google/refine/expr/functions/FacetCount.java @@ -0,0 +1,64 @@ +package com.google.refine.expr.functions; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.browsing.Engine; +import com.google.refine.browsing.util.ExpressionNominalValueGrouper; +import com.google.refine.expr.EvalError; +import com.google.refine.expr.Evaluable; +import com.google.refine.expr.MetaParser; +import com.google.refine.expr.ParsingException; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.Function; +import com.google.refine.model.Column; +import com.google.refine.model.Project; + +public class FacetCount implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 3 && args[1] instanceof String && args[2] instanceof String) { + Object choiceValue = args[0]; // choice value to look up + String facetExpression = (String) args[1]; + String columnName = (String) args[2]; + + Project project = (Project) bindings.get("project"); + Column column = project.columnModel.getColumnByName(columnName); + if (column == null) { + return new EvalError("No such column named " + columnName); + } + + String key = "nominal-bin:" + facetExpression; + ExpressionNominalValueGrouper grouper = (ExpressionNominalValueGrouper) column.getPrecompute(key); + if (grouper == null) { + try { + Evaluable eval = MetaParser.parse(facetExpression); + Engine engine = new Engine(project); + + grouper = new ExpressionNominalValueGrouper(eval, columnName, column.getCellIndex()); + engine.getAllRows().accept(project, grouper); + + column.setPrecompute(key, grouper); + } catch (ParsingException e) { + return new EvalError("Error parsing facet expression " + facetExpression); + } + } + + return grouper.getChoiceValueCountMultiple(choiceValue); + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + + " expects a choice value, an expression as a string, and a column name"); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns the facet count corresponding to the given choice value"); + writer.key("params"); writer.value("choiceValue, string facetExpression, string columnName"); + writer.key("returns"); writer.value("number"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/Get.java b/main/src/com/google/refine/expr/functions/Get.java new file mode 100644 index 000000000..63c71ee59 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/Get.java @@ -0,0 +1,147 @@ +package com.google.refine.expr.functions; + +import java.util.List; +import java.util.Properties; + +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.expr.HasFields; +import com.google.refine.expr.HasFieldsList; +import com.google.refine.gel.Function; + +public class Get implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length > 1 && args.length <= 3) { + Object v = args[0]; + Object from = args[1]; + Object to = (args.length == 3) ? args[2] : null; + + if (v != null && from != null) { + if (v instanceof HasFields && from instanceof String) { + return ((HasFields) v).getField((String) from, bindings); + } else if (v instanceof JSONObject && from instanceof String) { + try { + return ((JSONObject) v).get((String) from); + } catch (JSONException e) { + // ignore; will return null + } + } else { + if (from instanceof Number && (to == null || to instanceof Number)) { + if (v.getClass().isArray() || + v instanceof List || + v instanceof HasFieldsList || + v instanceof JSONArray) { + + int length = 0; + if (v.getClass().isArray()) { + length = ((Object[]) v).length; + } else if (v instanceof HasFieldsList) { + length = ((HasFieldsList) v).length(); + } else if (v instanceof JSONArray) { + length = ((JSONArray) v).length(); + } else { + length = ExpressionUtils.toObjectList(v).size(); + } + + int start = ((Number) from).intValue(); + if (start < 0) { + start = length + start; + } + start = Math.min(length, Math.max(0, start)); + + if (to == null) { + if (v.getClass().isArray()) { + return ((Object[]) v)[start]; + } else if (v instanceof HasFieldsList) { + return ((HasFieldsList) v).get(start); + } else if (v instanceof JSONArray) { + try { + return ((JSONArray) v).get(start); + } catch (JSONException e) { + // ignore; will return null + } + } else { + return ExpressionUtils.toObjectList(v).get(start); + } + } else { + int end = (to != null) ? ((Number) to).intValue() : length; + + if (end < 0) { + end = length + end; + } + end = Math.min(length, Math.max(start, end)); + + if (end > start) { + if (v.getClass().isArray()) { + Object[] a2 = new Object[end - start]; + + System.arraycopy((Object[]) v, start, a2, 0, end - start); + + return a2; + } else if (v instanceof HasFieldsList) { + return ((HasFieldsList) v).getSubList(start, end); + } else if (v instanceof JSONArray) { + JSONArray a = (JSONArray) v; + Object[] a2 = new Object[end - start]; + + for (int i = 0; i < a2.length; i++) { + try { + a2[i] = a.get(start + i); + } catch (JSONException e) { + // ignore + } + } + + return a2; + } else { + return ExpressionUtils.toObjectList(v).subList(start, end); + } + } + } + } else { + String s = (v instanceof String) ? (String) v : v.toString(); + + int start = ((Number) from).intValue(); + if (start < 0) { + start = s.length() + start; + } + start = Math.min(s.length(), Math.max(0, start)); + + if (to != null) { + int end = ((Number) to).intValue(); + if (end < 0) { + end = s.length() + end; + } + end = Math.min(s.length(), Math.max(start, end)); + + return s.substring(start, end); + } else { + return s.substring(start, start + 1); + } + } + } + } + } + } + return null; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value( + "If o has fields, returns the field named 'from' of o. " + + "If o is an array, returns o[from, to]. " + + "if o is a string, returns o.substring(from, to)" + ); + writer.key("params"); writer.value("o, number or string from, optional number to"); + writer.key("returns"); writer.value("Depends on actual arguments"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/HasField.java b/main/src/com/google/refine/expr/functions/HasField.java new file mode 100644 index 000000000..c1be4c896 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/HasField.java @@ -0,0 +1,44 @@ +package com.google.refine.expr.functions; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.expr.HasFields; +import com.google.refine.gel.Function; + +public class HasField implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length > 1 && args.length <= 2) { + Object v = args[0]; + Object f = args[1]; + + if (v != null && f != null && f instanceof String) { + String name = (String) f; + if (v instanceof HasFields) { + return ((HasFields) v).getField(name, bindings) != null; + } else if (v instanceof JSONObject) { + try { + return ((JSONObject) v).get(name) != null; + } catch (JSONException e) { + // ignore; will return false + } + } + } + } + return false; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns whether o has field name"); + writer.key("params"); writer.value("o, string name"); + writer.key("returns"); writer.value("boolean"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/Jsonize.java b/main/src/com/google/refine/expr/functions/Jsonize.java new file mode 100644 index 000000000..b9d80e4c7 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/Jsonize.java @@ -0,0 +1,56 @@ +package com.google.refine.expr.functions; + +import java.util.Collection; +import java.util.Map; +import java.util.Properties; + +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.gel.Function; + +public class Jsonize implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length >= 1) { + try { + Object o1 = args[0]; + if (o1 == null) { + return "null"; + } else if (o1 instanceof Number) { + return JSONObject.numberToString((Number) o1); + } else if (o1 instanceof Boolean) { + return o1.toString(); + } else if (o1 instanceof JSONObject) { + return ((JSONObject) o1).toString(); + } else if (o1 instanceof JSONArray) { + return ((JSONArray) o1).toString(); + } else if (o1 instanceof Map) { + return new JSONObject((Map) o1).toString(); + } else if (o1 instanceof Collection) { + return new JSONArray((Collection) o1).toString(); + } else if (o1.getClass().isArray()) { + return new JSONArray(o1).toString(); + } else { + return JSONObject.quote(o1.toString()); + } + } catch (JSONException e) { + throw new RuntimeException(e); + } + } + return null; + } + + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Quotes a value as a JSON literal value"); + writer.key("params"); writer.value("value"); + writer.key("returns"); writer.value("JSON literal value"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/Length.java b/main/src/com/google/refine/expr/functions/Length.java new file mode 100644 index 000000000..2b10d6531 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/Length.java @@ -0,0 +1,49 @@ +package com.google.refine.expr.functions; + +import java.util.Collection; +import java.util.Properties; + +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.EvalError; +import com.google.refine.expr.HasFieldsList; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.Function; + +public class Length implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 1) { + Object v = args[0]; + + if (v != null) { + if (v.getClass().isArray()) { + Object[] a = (Object[]) v; + return a.length; + } else if (v instanceof Collection) { + return ((Collection) v).size(); + } else if (v instanceof HasFieldsList) { + return ((HasFieldsList) v).length(); + } else if (v instanceof JSONArray) { + return ((JSONArray) v).length(); + } else { + String s = (v instanceof String ? (String) v : v.toString()); + return s.length(); + } + } + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects an array or a string"); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns the length of o"); + writer.key("params"); writer.value("array or string o"); + writer.key("returns"); writer.value("number"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/Slice.java b/main/src/com/google/refine/expr/functions/Slice.java new file mode 100644 index 000000000..be4bd5e63 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/Slice.java @@ -0,0 +1,110 @@ +package com.google.refine.expr.functions; + +import java.util.List; +import java.util.Properties; + +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.expr.HasFieldsList; +import com.google.refine.gel.Function; + +public class Slice implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length > 1 && args.length <= 3) { + Object v = args[0]; + Object from = args[1]; + Object to = (args.length == 3) ? args[2] : null; + + if (v != null && from != null && from instanceof Number && (to == null || to instanceof Number)) { + if (v.getClass().isArray() || v instanceof List || v instanceof HasFieldsList || v instanceof JSONArray) { + int length = 0; + if (v.getClass().isArray()) { + length = ((Object[]) v).length; + } else if (v instanceof HasFieldsList) { + length = ((HasFieldsList) v).length(); + } else if (v instanceof JSONArray) { + length = ((JSONArray) v).length(); + } else { + length = ExpressionUtils.toObjectList(v).size(); + } + + int start = ((Number) from).intValue(); + int end = (to != null) ? ((Number) to).intValue() : length; + + if (start < 0) { + start = length + start; + } + start = Math.min(length, Math.max(0, start)); + + if (end < 0) { + end = length + end; + } + end = Math.min(length, Math.max(start, end)); + + if (v.getClass().isArray()) { + Object[] a2 = new Object[end - start]; + + System.arraycopy((Object[]) v, start, a2, 0, end - start); + + return a2; + } else if (v instanceof HasFieldsList) { + return ((HasFieldsList) v).getSubList(start, end); + } else if (v instanceof JSONArray) { + JSONArray a = (JSONArray) v; + Object[] a2 = new Object[end - start]; + + for (int i = 0; i < a2.length; i++) { + try { + a2[i] = a.get(start + i); + } catch (JSONException e) { + // ignore + } + } + + return a2; + } else { + return ExpressionUtils.toObjectList(v).subList(start, end); + } + } else { + String s = (v instanceof String) ? (String) v : v.toString(); + + int start = ((Number) from).intValue(); + if (start < 0) { + start = s.length() + start; + } + start = Math.min(s.length(), Math.max(0, start)); + + if (to != null) { + int end = ((Number) to).intValue(); + if (end < 0) { + end = s.length() + end; + } + end = Math.min(s.length(), Math.max(start, end)); + + return s.substring(start, end); + } else { + return s.substring(start); + } + } + } + } + return null; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value( + "If o is an array, returns o[from, to]. " + + "if o is a string, returns o.substring(from, to)" + ); + writer.key("params"); writer.value("o, number from, optional number to"); + writer.key("returns"); writer.value("Depends on actual arguments"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/ToDate.java b/main/src/com/google/refine/expr/functions/ToDate.java new file mode 100644 index 000000000..ed4e71570 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/ToDate.java @@ -0,0 +1,77 @@ +package com.google.refine.expr.functions; + +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.GregorianCalendar; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.util.CalendarParser; +import com.google.refine.expr.util.CalendarParserException; +import com.google.refine.gel.Function; + +public class ToDate implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 0) { + // missing value, can this happen? + return null; + } + if (!(args[0] instanceof String)) { + // ignore cell values that aren't strings + return null; + } + String o1 = (String) args[0]; + + // "o, boolean month_first (optional)" + if (args.length == 1 || (args.length == 2 && args[1] instanceof Boolean)) { + boolean month_first = true; + if (args.length == 2) { + month_first = (Boolean) args[1]; + } + try { + return CalendarParser.parse( o1, (month_first) ? CalendarParser.MM_DD_YY : CalendarParser.DD_MM_YY); + } catch (CalendarParserException e) { + // do something about + } + } + + // "o, format1, format2 (optional), ..." + if (args.length>=2) { + for (int i=1;i= 1) { + Object o1 = args[0]; + if (o1 != null) { + if (o1 instanceof Calendar) { + if (args.length == 2) { + Object o2 = args[1]; + if (o2 != null && o2 instanceof String) { + SimpleDateFormat formatter = new SimpleDateFormat((String) o2); + return formatter.format(((Calendar) o1).getTime()); + } + } + } else { + return (o1 instanceof String) ? o1 : o1.toString(); + } + } + } + return null; + } + + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns o converted to a string"); + writer.key("params"); writer.value("o, string format (optional)"); + writer.key("returns"); writer.value("string"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/Type.java b/main/src/com/google/refine/expr/functions/Type.java new file mode 100644 index 000000000..8ea9a0c04 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/Type.java @@ -0,0 +1,50 @@ +package com.google.refine.expr.functions; + +import java.util.Calendar; +import java.util.Date; +import java.util.List; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.EvalError; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.Function; + +public class Type implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 1) { + Object v = args[0]; + + if (v != null) { + if (v instanceof String) { + return "string"; + } else if (v instanceof Calendar || v instanceof Date) { + return "date"; + } else if (v instanceof Number) { + return "number"; + } else if (v.getClass().isArray() || v instanceof List) { + return "array"; + } else if (v instanceof EvalError) { + return "error"; + } else { + return v.getClass().getName(); + } + } + return "undefined"; + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects one argument"); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns the type of o"); + writer.key("params"); writer.value("object o"); + writer.key("returns"); writer.value("string"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/arrays/ArgsToArray.java b/main/src/com/google/refine/expr/functions/arrays/ArgsToArray.java new file mode 100644 index 000000000..3075713a3 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/arrays/ArgsToArray.java @@ -0,0 +1,25 @@ +package com.google.refine.expr.functions.arrays; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.gel.Function; + +public class ArgsToArray implements Function { + + public Object call(Properties bindings, Object[] args) { + return args; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns all arguments passed to it as an array"); + writer.key("params"); writer.value("a1, a2, ..."); + writer.key("returns"); writer.value("array"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/arrays/Join.java b/main/src/com/google/refine/expr/functions/arrays/Join.java new file mode 100644 index 000000000..503e12f8a --- /dev/null +++ b/main/src/com/google/refine/expr/functions/arrays/Join.java @@ -0,0 +1,78 @@ +package com.google.refine.expr.functions.arrays; + +import java.util.List; +import java.util.Properties; + +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.EvalError; +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.Function; + +public class Join implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 2) { + Object v = args[0]; + Object s = args[1]; + + if (v != null && s != null && s instanceof String) { + String separator = (String) s; + + if (v.getClass().isArray() || v instanceof List || v instanceof JSONArray) { + StringBuffer sb = new StringBuffer(); + if (v.getClass().isArray()) { + for (Object o : (Object[]) v) { + if (o != null) { + if (sb.length() > 0) { + sb.append(separator); + } + sb.append(o.toString()); + } + } + } else if (v instanceof JSONArray) { + JSONArray a = (JSONArray) v; + int l = a.length(); + + for (int i = 0; i < l; i++) { + if (sb.length() > 0) { + sb.append(separator); + } + try { + sb.append(a.get(i).toString()); + } catch (JSONException e) { + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + + " cannot retrieve element " + i + " of array"); + } + } + } else { + for (Object o : ExpressionUtils.toObjectList(v)) { + if (o != null) { + if (sb.length() > 0) { + sb.append(separator); + } + sb.append(o.toString()); + } + } + } + + return sb.toString(); + } + } + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects an array and a string"); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns the string obtained by joining the array a with the separator sep"); + writer.key("params"); writer.value("array a, string sep"); + writer.key("returns"); writer.value("string"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/arrays/Reverse.java b/main/src/com/google/refine/expr/functions/arrays/Reverse.java new file mode 100644 index 000000000..8b2e3704f --- /dev/null +++ b/main/src/com/google/refine/expr/functions/arrays/Reverse.java @@ -0,0 +1,65 @@ +package com.google.refine.expr.functions.arrays; + +import java.util.List; +import java.util.Properties; + +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.EvalError; +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.Function; +import com.google.refine.util.JSONUtilities; + +public class Reverse implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 1) { + Object v = args[0]; + + if (v != null) { + if (v instanceof JSONArray) { + try { + v = JSONUtilities.toArray((JSONArray) v); + } catch (JSONException e) { + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + + " fails to process a JSON array: " + e.getMessage()); + } + } + + if (v.getClass().isArray() || v instanceof List) { + int length = v.getClass().isArray() ? + ((Object[]) v).length : + ExpressionUtils.toObjectList(v).size(); + + Object[] r = new Object[length]; + if (v.getClass().isArray()) { + Object[] a = (Object[]) v; + for (int i = 0; i < length; i++) { + r[i] = a[r.length - i - 1]; + } + } else { + List a = ExpressionUtils.toObjectList(v); + for (int i = 0; i < length; i++) { + r[i] = a.get(r.length - i - 1); + } + } + return r; + } + } + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects an array"); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Reverses array a"); + writer.key("params"); writer.value("array a"); + writer.key("returns"); writer.value("array"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/arrays/Sort.java b/main/src/com/google/refine/expr/functions/arrays/Sort.java new file mode 100644 index 000000000..7ebd731ad --- /dev/null +++ b/main/src/com/google/refine/expr/functions/arrays/Sort.java @@ -0,0 +1,63 @@ +package com.google.refine.expr.functions.arrays; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Properties; + +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.EvalError; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.Function; +import com.google.refine.util.JSONUtilities; + +public class Sort implements Function { + + @SuppressWarnings("unchecked") + public Object call(Properties bindings, Object[] args) { + if (args.length == 1) { + Object v = args[0]; + + if (v != null) { + if (v.getClass().isArray()) { + Object[] a = (Object[]) v; + Object[] r = a.clone(); + + Arrays.sort(r, 0, r.length); + + return r; + } else if (v instanceof JSONArray) { + try { + Object[] r = JSONUtilities.toArray((JSONArray) v); + + Arrays.sort(r, 0, r.length); + + return r; + } catch (JSONException e) { + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + + " fails to process a JSON array: " + e.getMessage()); + } + } else if (v instanceof List) { + List> a = (List>) v; + Collections.sort(a); + + return a; + } + } + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects an array"); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Sorts array a"); + writer.key("params"); writer.value("array a"); + writer.key("returns"); writer.value("array"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/arrays/Uniques.java b/main/src/com/google/refine/expr/functions/arrays/Uniques.java new file mode 100644 index 000000000..9b8f9169f --- /dev/null +++ b/main/src/com/google/refine/expr/functions/arrays/Uniques.java @@ -0,0 +1,63 @@ +package com.google.refine.expr.functions.arrays; + +import java.util.HashSet; +import java.util.List; +import java.util.Properties; +import java.util.Set; + +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.EvalError; +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.Function; +import com.google.refine.util.JSONUtilities; + +public class Uniques implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 1) { + Object v = args[0]; + + if (v != null) { + if (v instanceof JSONArray) { + try { + v = JSONUtilities.toArray((JSONArray) v); + } catch (JSONException e) { + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + + " fails to process a JSON array: " + e.getMessage()); + } + } + + if (v.getClass().isArray() || v instanceof List) { + Set set = null; + + if (v.getClass().isArray()) { + Object[] a = (Object[]) v; + + set = new HashSet(a.length); + for (int i = 0; i < a.length; i++) { + set.add(a[i]); + } + } else { + set = new HashSet(ExpressionUtils.toObjectList(v)); + } + return set.toArray(); + } + } + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects an array"); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns array a with duplicates removed"); + writer.key("params"); writer.value("array a"); + writer.key("returns"); writer.value("array"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/booleans/And.java b/main/src/com/google/refine/expr/functions/booleans/And.java new file mode 100644 index 000000000..cd3b592f4 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/booleans/And.java @@ -0,0 +1,30 @@ +package com.google.refine.expr.functions.booleans; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.gel.Function; + +public class And implements Function { + + public Object call(Properties bindings, Object[] args) { + for (Object o : args) { + if (!Not.objectToBoolean(o)) { + return false; + } + } + return true; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("ANDs two boolean values"); + writer.key("params"); writer.value("boolean a, boolean b"); + writer.key("returns"); writer.value("boolean"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/booleans/Not.java b/main/src/com/google/refine/expr/functions/booleans/Not.java new file mode 100644 index 000000000..017484e15 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/booleans/Not.java @@ -0,0 +1,35 @@ +package com.google.refine.expr.functions.booleans; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.EvalError; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.Function; + +public class Not implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 1) { + return !objectToBoolean(args[0]); + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a boolean"); + } + + public static boolean objectToBoolean(Object o) { + return o == null ? false : ( + (o instanceof Boolean) ? ((Boolean) o).booleanValue() : Boolean.parseBoolean(o.toString())); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns the opposite of b"); + writer.key("params"); writer.value("boolean b"); + writer.key("returns"); writer.value("boolean"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/booleans/Or.java b/main/src/com/google/refine/expr/functions/booleans/Or.java new file mode 100644 index 000000000..1484dee7c --- /dev/null +++ b/main/src/com/google/refine/expr/functions/booleans/Or.java @@ -0,0 +1,30 @@ +package com.google.refine.expr.functions.booleans; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.gel.Function; + +public class Or implements Function { + + public Object call(Properties bindings, Object[] args) { + for (Object o : args) { + if (Not.objectToBoolean(o)) { + return true; + } + } + return false; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns a OR b"); + writer.key("params"); writer.value("boolean a, boolean b"); + writer.key("returns"); writer.value("boolean"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/date/DatePart.java b/main/src/com/google/refine/expr/functions/date/DatePart.java new file mode 100644 index 000000000..31fac2c40 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/date/DatePart.java @@ -0,0 +1,68 @@ +package com.google.refine.expr.functions.date; + +import java.util.Calendar; +import java.util.Date; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.EvalError; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.Function; + +public class DatePart implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 2 && + args[0] != null && (args[0] instanceof Calendar || args[0] instanceof Date) && + args[1] != null && args[1] instanceof String) { + + String part = (String) args[1]; + if (args[0] instanceof Calendar) { + return getPart((Calendar) args[0], part); + } else { + Calendar c = Calendar.getInstance(); + c.setTime((Date) args[0]); + return getPart(c, part); + } + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a date, a number and a string"); + } + + static private String[] s_daysOfWeek = new String[] { + "Saturday", "Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday" + }; + + private Object getPart(Calendar c, String part) { + if ("hours".equals(part) || "hour".equals(part) || "h".equals(part)) { + return c.get(Calendar.HOUR_OF_DAY); + } else if ("minutes".equals(part) || "minute".equals(part) || "min".equals(part)) { // avoid 'm' to avoid confusion with month + return c.get(Calendar.MINUTE); + } else if ("seconds".equals(part) || "sec".equals(part) || "s".equals(part)) { + return c.get(Calendar.SECOND); + } else if ("years".equals(part) || "year".equals(part)) { + return c.get(Calendar.YEAR); + } else if ("months".equals(part) || "month".equals(part)) { // avoid 'm' to avoid confusion with minute + return c.get(Calendar.MONTH) + 1; // ISSUE 115 - people expect January to be 1 not 0 + } else if ("weeks".equals(part) || "week".equals(part) || "w".equals(part)) { + return c.get(Calendar.WEEK_OF_MONTH); + } else if ("days".equals(part) || "day".equals(part) || "d".equals(part)) { + return c.get(Calendar.DAY_OF_MONTH); + } else if ("weekday".equals(part)) { + return s_daysOfWeek[c.get(Calendar.DAY_OF_WEEK)]; + } else if ("time".equals(part)) { + return c.getTimeInMillis(); + } else { + return new EvalError("Date unit '" + part + "' not recognized."); + } + } + + public void write(JSONWriter writer, Properties options) throws JSONException { + writer.object(); + writer.key("description"); writer.value("Returns part of a date"); + writer.key("params"); writer.value("date d, string part"); + writer.key("returns"); writer.value("date"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/date/Inc.java b/main/src/com/google/refine/expr/functions/date/Inc.java new file mode 100644 index 000000000..0af2f59b8 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/date/Inc.java @@ -0,0 +1,58 @@ +package com.google.refine.expr.functions.date; + +import java.util.Calendar; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.EvalError; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.Function; + +public class Inc implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 3 && + args[0] != null && args[0] instanceof Calendar && + args[1] != null && args[1] instanceof Number && + args[2] != null && args[2] instanceof String) { + Calendar date = (Calendar) args[0]; + int amount = ((Number) args[1]).intValue(); + String unit = (String) args[2]; + + date.add(getField(unit), amount); + + return date; + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a date, a number and a string"); + } + + private int getField(String unit) { + if ("hours".equals(unit) || "hour".equals(unit) || "h".equals(unit)) { + return Calendar.HOUR; + } else if ("days".equals(unit) || "day".equals(unit) || "d".equals(unit)) { + return Calendar.DAY_OF_MONTH; + } else if ("years".equals(unit) || "year".equals(unit)) { + return Calendar.YEAR; + } else if ("months".equals(unit) || "month".equals(unit)) { // avoid 'm' to avoid confusion with minute + return Calendar.MONTH; + } else if ("minutes".equals(unit) || "minute".equals(unit) || "min".equals(unit)) { // avoid 'm' to avoid confusion with month + return Calendar.MINUTE; + } else if ("weeks".equals(unit) || "week".equals(unit) || "w".equals(unit)) { + return Calendar.WEEK_OF_MONTH; + } else if ("seconds".equals(unit) || "sec".equals(unit) || "s".equals(unit)) { + return Calendar.SECOND; + } else { + throw new RuntimeException("Unit '" + unit + "' not recognized."); + } + } + + public void write(JSONWriter writer, Properties options) throws JSONException { + writer.object(); + writer.key("description"); writer.value("Returns a date changed by the given amount in the given unit of time"); + writer.key("params"); writer.value("date d, number value, string unit (default to 'hour')"); + writer.key("returns"); writer.value("date"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/date/Now.java b/main/src/com/google/refine/expr/functions/date/Now.java new file mode 100644 index 000000000..918030373 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/date/Now.java @@ -0,0 +1,28 @@ +package com.google.refine.expr.functions.date; + +import java.util.Calendar; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.gel.Function; + +public class Now implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 0) { + return Calendar.getInstance(); + } + return null; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns the current time"); + writer.key("returns"); writer.value("date"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/math/Ceil.java b/main/src/com/google/refine/expr/functions/math/Ceil.java new file mode 100644 index 000000000..61382c2fb --- /dev/null +++ b/main/src/com/google/refine/expr/functions/math/Ceil.java @@ -0,0 +1,30 @@ +package com.google.refine.expr.functions.math; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.EvalError; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.Function; + +public class Ceil implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 1 && args[0] != null && args[0] instanceof Number) { + return (long) Math.ceil(((Number) args[0]).doubleValue()); + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a number"); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns the ceiling of a number"); + writer.key("params"); writer.value("number d"); + writer.key("returns"); writer.value("number"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/math/Exp.java b/main/src/com/google/refine/expr/functions/math/Exp.java new file mode 100644 index 000000000..a5d3c2213 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/math/Exp.java @@ -0,0 +1,28 @@ +package com.google.refine.expr.functions.math; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.gel.Function; + +public class Exp implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 1 && args[0] instanceof Number) { + return Math.exp(((Number) args[0]).doubleValue()); + } + return null; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns e^n"); + writer.key("params"); writer.value("number n"); + writer.key("returns"); writer.value("number"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/math/Floor.java b/main/src/com/google/refine/expr/functions/math/Floor.java new file mode 100644 index 000000000..692fff2b0 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/math/Floor.java @@ -0,0 +1,31 @@ +package com.google.refine.expr.functions.math; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.EvalError; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.Function; + +public class Floor implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 1 && args[0] != null && args[0] instanceof Number) { + return (long) Math.floor(((Number) args[0]).doubleValue()); + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a number"); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns the floor of a number"); + writer.key("params"); writer.value("number d"); + writer.key("returns"); writer.value("number"); + writer.endObject(); + } + +} diff --git a/main/src/com/google/refine/expr/functions/math/Ln.java b/main/src/com/google/refine/expr/functions/math/Ln.java new file mode 100644 index 000000000..66f34e6e6 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/math/Ln.java @@ -0,0 +1,30 @@ +package com.google.refine.expr.functions.math; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.EvalError; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.Function; + +public class Ln implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 1 && args[0] != null && args[0] instanceof Number) { + return Math.log(((Number) args[0]).doubleValue()); + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a number"); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns the natural log of n"); + writer.key("params"); writer.value("number n"); + writer.key("returns"); writer.value("number"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/math/Log.java b/main/src/com/google/refine/expr/functions/math/Log.java new file mode 100644 index 000000000..1a4755aef --- /dev/null +++ b/main/src/com/google/refine/expr/functions/math/Log.java @@ -0,0 +1,30 @@ +package com.google.refine.expr.functions.math; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.EvalError; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.Function; + +public class Log implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 1 && args[0] != null && args[0] instanceof Number) { + return Math.log10(((Number) args[0]).doubleValue()); + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a number"); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns the base 10 log of n"); + writer.key("params"); writer.value("number n"); + writer.key("returns"); writer.value("number"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/math/Max.java b/main/src/com/google/refine/expr/functions/math/Max.java new file mode 100644 index 000000000..8e6027688 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/math/Max.java @@ -0,0 +1,34 @@ +package com.google.refine.expr.functions.math; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.EvalError; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.Function; + +public class Max implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 2 && + args[0] != null && args[0] instanceof Number && + args[1] != null && args[1] instanceof Number) { + return Math.max( + ((Number) args[0]).doubleValue(), + ((Number) args[1]).doubleValue()); + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects 2 numbers"); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns the greater of two numbers"); + writer.key("params"); writer.value("number a, number b"); + writer.key("returns"); writer.value("number"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/math/Min.java b/main/src/com/google/refine/expr/functions/math/Min.java new file mode 100644 index 000000000..0554c8e03 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/math/Min.java @@ -0,0 +1,34 @@ +package com.google.refine.expr.functions.math; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.EvalError; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.Function; + +public class Min implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 2 && + args[0] != null && args[0] instanceof Number && + args[1] != null && args[1] instanceof Number) { + return Math.min( + ((Number) args[0]).doubleValue(), + ((Number) args[1]).doubleValue()); + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects 2 numbers"); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns the smaller of two numbers"); + writer.key("params"); writer.value("number a, number b"); + writer.key("returns"); writer.value("number"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/math/Mod.java b/main/src/com/google/refine/expr/functions/math/Mod.java new file mode 100644 index 000000000..b3581e0c3 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/math/Mod.java @@ -0,0 +1,35 @@ +package com.google.refine.expr.functions.math; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.EvalError; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.Function; + +public class Mod implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 2 && + args[0] != null && args[0] instanceof Number && + args[1] != null && args[1] instanceof Number) { + int a = ((Number) args[0]).intValue(); + int b = ((Number) args[1]).intValue(); + + return a % b; + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects 2 numbers"); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns a modulus b"); + writer.key("params"); writer.value("number a, number b"); + writer.key("returns"); writer.value("number"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/math/Pow.java b/main/src/com/google/refine/expr/functions/math/Pow.java new file mode 100644 index 000000000..6aa40ce71 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/math/Pow.java @@ -0,0 +1,31 @@ +package com.google.refine.expr.functions.math; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.gel.Function; + +public class Pow implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 2 && args[0] instanceof Number && args[1] instanceof Number) { + return Math.pow( + ((Number) args[0]).doubleValue(), + ((Number) args[1]).doubleValue() + ); + } + return null; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns a^b"); + writer.key("params"); writer.value("number a, number b"); + writer.key("returns"); writer.value("number"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/math/Round.java b/main/src/com/google/refine/expr/functions/math/Round.java new file mode 100644 index 000000000..3cd4cb875 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/math/Round.java @@ -0,0 +1,30 @@ +package com.google.refine.expr.functions.math; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.EvalError; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.Function; + +public class Round implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 1 && args[0] != null && args[0] instanceof Number) { + return ((Number) args[0]).longValue(); + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a number"); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns n rounded"); + writer.key("params"); writer.value("number n"); + writer.key("returns"); writer.value("number"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/math/Sum.java b/main/src/com/google/refine/expr/functions/math/Sum.java new file mode 100644 index 000000000..4aef524c1 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/math/Sum.java @@ -0,0 +1,59 @@ +package com.google.refine.expr.functions.math; + +import java.util.List; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.EvalError; +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.Function; + +public class Sum implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 1) { + Object v = args[0]; + + if (v != null && (v.getClass().isArray() || v instanceof List)) { + int length = v.getClass().isArray() ? + ((Object[]) v).length : + ExpressionUtils.toObjectList(v).size(); + + double total = 0; + + if (v.getClass().isArray()) { + Object[] a = (Object[]) v; + for (int i = 0; i < length; i++) { + Object n = a[length - i - 1]; + if (n instanceof Number) { + total += ((Number) n).doubleValue(); + } + } + } else { + List a = ExpressionUtils.toObjectList(v); + for (int i = 0; i < length; i++) { + Object n = a.get(length - i - 1); + if (n instanceof Number) { + total += ((Number) n).doubleValue(); + } + } + } + return total; + } + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects an array of numbers"); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Sums numbers in array a"); + writer.key("params"); writer.value("array a"); + writer.key("returns"); writer.value("number"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/strings/Chomp.java b/main/src/com/google/refine/expr/functions/strings/Chomp.java new file mode 100644 index 000000000..4960f97b9 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/strings/Chomp.java @@ -0,0 +1,33 @@ +package com.google.refine.expr.functions.strings; + +import java.util.Properties; + +import org.apache.commons.lang.StringUtils; +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.gel.Function; + +public class Chomp implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 2) { + Object o1 = args[0]; + Object o2 = args[1]; + if (o1 != null && o2 != null && o1 instanceof String && o2 instanceof String) { + return StringUtils.chomp((String) o1, (String) o2); + } + } + return null; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Removes separator from the end of str if it's there, otherwise leave it alone."); + writer.key("params"); writer.value("string str, string separator"); + writer.key("returns"); writer.value("string"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/strings/Contains.java b/main/src/com/google/refine/expr/functions/strings/Contains.java new file mode 100644 index 000000000..fdf8f704c --- /dev/null +++ b/main/src/com/google/refine/expr/functions/strings/Contains.java @@ -0,0 +1,31 @@ +package com.google.refine.expr.functions.strings; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.gel.Function; + +public class Contains implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 2) { + Object s1 = args[0]; + Object s2 = args[1]; + if (s1 != null && s2 != null && s1 instanceof String && s2 instanceof String) { + return ((String) s1).indexOf((String) s2) > -1; + } + } + return null; + } + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns whether s contains frag"); + writer.key("params"); writer.value("string s, string frag"); + writer.key("returns"); writer.value("boolean"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/strings/Diff.java b/main/src/com/google/refine/expr/functions/strings/Diff.java new file mode 100644 index 000000000..398a518a4 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/strings/Diff.java @@ -0,0 +1,59 @@ +package com.google.refine.expr.functions.strings; + +import java.util.Calendar; +import java.util.Date; +import java.util.Properties; + +import org.apache.commons.lang.StringUtils; +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.util.CalendarParser; +import com.google.refine.expr.util.CalendarParserException; +import com.google.refine.gel.Function; + +public class Diff implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length >= 2 && args.length <= 3) { + Object o1 = args[0]; + Object o2 = args[1]; + if (o1 != null && o2 != null) { + if (o1 instanceof String && o2 instanceof String) { + return StringUtils.difference((String) o1,(String) o2); + } else if ((o1 instanceof Date || o1 instanceof Calendar) && args.length == 3) { + Object o3 = args[2]; + if (o3 != null && o3 instanceof String) { + try { + String unit = ((String) o3).toLowerCase(); + Date c1 = (o1 instanceof Date) ? (Date) o1 : ((Calendar) o1).getTime(); + Date c2 = (o2 instanceof Date) ? (Date) o2 : CalendarParser.parse((o2 instanceof String) ? (String) o2 : o2.toString()).getTime(); + long delta = (c1.getTime() - c2.getTime()) / 1000; + if ("seconds".equals(unit)) return delta; + delta /= 60; + if ("minutes".equals(unit)) return delta; + delta /= 60; + if ("hours".equals(unit)) return delta; + long days = delta / 24; + if ("days".equals(unit)) return days; + if ("weeks".equals(unit)) return days / 7; + if ("months".equals(unit)) return days / 30; + if ("years".equals(unit)) return days / 365; + } catch (CalendarParserException e) { + // we should throw at this point because it's important to know that date parsing failed + } + } + } + } + } + return null; + } + + public void write(JSONWriter writer, Properties options) throws JSONException { + writer.object(); + writer.key("description"); writer.value("For strings, returns the portion where they differ. For dates, it returns the difference in given time units"); + writer.key("params"); writer.value("o1, o2, time unit (optional)"); + writer.key("returns"); writer.value("string for strings, number for dates"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/strings/EndsWith.java b/main/src/com/google/refine/expr/functions/strings/EndsWith.java new file mode 100644 index 000000000..d05b3d045 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/strings/EndsWith.java @@ -0,0 +1,34 @@ +package com.google.refine.expr.functions.strings; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.EvalError; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.Function; + +public class EndsWith implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 2) { + Object s1 = args[0]; + Object s2 = args[1]; + if (s1 != null && s2 != null && s1 instanceof String && s2 instanceof String) { + return ((String) s1).endsWith((String) s2); + } + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects 2 strings"); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns whether s ends with sub"); + writer.key("params"); writer.value("string s, string sub"); + writer.key("returns"); writer.value("boolean"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/strings/Escape.java b/main/src/com/google/refine/expr/functions/strings/Escape.java new file mode 100644 index 000000000..f428fa8cd --- /dev/null +++ b/main/src/com/google/refine/expr/functions/strings/Escape.java @@ -0,0 +1,53 @@ +package com.google.refine.expr.functions.strings; + +import java.io.UnsupportedEncodingException; +import java.net.URLEncoder; +import java.util.Properties; + +import org.apache.commons.lang.StringEscapeUtils; +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.EvalError; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.Function; + +public class Escape implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 2) { + Object o1 = args[0]; + Object o2 = args[1]; + if (o1 != null && o2 != null && o1 instanceof String && o2 instanceof String) { + String s = (String) o1; + String mode = ((String) o2).toLowerCase(); + if ("html".equals(mode)) { + return StringEscapeUtils.escapeHtml(s); + } else if ("xml".equals(mode)) { + return StringEscapeUtils.escapeXml(s); + } else if ("csv".equals(mode)) { + return StringEscapeUtils.escapeCsv(s); + } else if ("javascript".equals(mode)) { + return StringEscapeUtils.escapeJavaScript(s); + } else if ("url".equals(mode)) { + try { + return URLEncoder.encode(s,"UTF-8"); + } catch (UnsupportedEncodingException e) {} + } else { + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " does not recognize mode '" + mode + "'."); + } + } + } + return null; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Escapes a string depending on the given escaping mode."); + writer.key("params"); writer.value("string s, string mode ['html','xml','csv','url','javascript']"); + writer.key("returns"); writer.value("string"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/strings/Fingerprint.java b/main/src/com/google/refine/expr/functions/strings/Fingerprint.java new file mode 100644 index 000000000..5d7356561 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/strings/Fingerprint.java @@ -0,0 +1,34 @@ +package com.google.refine.expr.functions.strings; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.clustering.binning.FingerprintKeyer; +import com.google.refine.clustering.binning.Keyer; +import com.google.refine.gel.Function; + +public class Fingerprint implements Function { + + static Keyer fingerprint = new FingerprintKeyer(); + + public Object call(Properties bindings, Object[] args) { + if (args.length == 1 && args[0] != null) { + Object o = args[0]; + String s = (o instanceof String) ? (String) o : o.toString(); + return fingerprint.key(s); + } + return null; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns the fingerprint of s, a derived string that aims to be a more canonical form of it (this is mostly useful for finding clusters of strings related to the same information)."); + writer.key("params"); writer.value("string s"); + writer.key("returns"); writer.value("string"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/strings/IndexOf.java b/main/src/com/google/refine/expr/functions/strings/IndexOf.java new file mode 100644 index 000000000..8d923554c --- /dev/null +++ b/main/src/com/google/refine/expr/functions/strings/IndexOf.java @@ -0,0 +1,34 @@ +package com.google.refine.expr.functions.strings; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.EvalError; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.Function; + +public class IndexOf implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 2) { + Object s1 = args[0]; + Object s2 = args[1]; + if (s1 != null && s2 != null && s1 instanceof String && s2 instanceof String) { + return ((String) s1).indexOf((String) s2); + } + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects 2 strings"); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns the index of sub first ocurring in s"); + writer.key("params"); writer.value("string s, string sub"); + writer.key("returns"); writer.value("number"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/strings/LastIndexOf.java b/main/src/com/google/refine/expr/functions/strings/LastIndexOf.java new file mode 100644 index 000000000..18a05b8a0 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/strings/LastIndexOf.java @@ -0,0 +1,35 @@ +package com.google.refine.expr.functions.strings; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.EvalError; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.Function; + +public class LastIndexOf implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 2) { + Object s1 = args[0]; + Object s2 = args[1]; + if (s1 != null && s2 != null && s1 instanceof String && s2 instanceof String) { + return ((String) s1).lastIndexOf((String) s2); + } + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects 2 strings"); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns the index of sub last ocurring in s"); + writer.key("params"); writer.value("string s, string sub"); + writer.key("returns"); writer.value("number"); + writer.endObject(); + } + +} diff --git a/main/src/com/google/refine/expr/functions/strings/MD5.java b/main/src/com/google/refine/expr/functions/strings/MD5.java new file mode 100644 index 000000000..c1aa9de20 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/strings/MD5.java @@ -0,0 +1,33 @@ +package com.google.refine.expr.functions.strings; + +import java.util.Properties; + +import org.apache.commons.codec.digest.DigestUtils; +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.EvalError; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.Function; + +public class MD5 implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 1 && args[0] != null) { + Object o = args[0]; + String s = (o instanceof String) ? (String) o : o.toString(); + return DigestUtils.md5Hex(s); + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a string"); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns the MD5 hash of s"); + writer.key("params"); writer.value("string s"); + writer.key("returns"); writer.value("string"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/strings/Match.java b/main/src/com/google/refine/expr/functions/strings/Match.java new file mode 100644 index 000000000..02c748d52 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/strings/Match.java @@ -0,0 +1,55 @@ +package com.google.refine.expr.functions.strings; + +import java.util.Properties; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.EvalError; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.Function; + +public class Match implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 2) { + Object s = args[0]; + Object p = args[1]; + + if (s != null && p != null && (p instanceof String || p instanceof Pattern)) { + + Pattern pattern = (p instanceof String) ? Pattern.compile((String) p) : (Pattern) p; + + Matcher matcher = pattern.matcher(s.toString()); + + if (matcher.matches()) { + int count = matcher.groupCount(); + + String[] groups = new String[count]; + for (int i = 0; i < count; i++) { + groups[i] = matcher.group(i + 1); + } + + return groups; + } else { + return null; + } + } + + return null; + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a string or a regexp"); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns an array of the groups matching the given regular expression"); + writer.key("params"); writer.value("string or regexp"); + writer.key("returns"); writer.value("array of strings"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/strings/NGram.java b/main/src/com/google/refine/expr/functions/strings/NGram.java new file mode 100644 index 000000000..110a1759e --- /dev/null +++ b/main/src/com/google/refine/expr/functions/strings/NGram.java @@ -0,0 +1,56 @@ +package com.google.refine.expr.functions.strings; + +import java.util.Properties; + +import org.apache.commons.lang.StringUtils; +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.EvalError; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.Function; + +public class NGram implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 2) { + Object s = args[0]; + Object n = args[1]; + + if (s != null && s instanceof String && n != null && n instanceof Number) { + + String[] tokens = StringUtils.split((String) s); + + int count = ((Number) n).intValue(); + if (count >= tokens.length) { + return new String[] { (String) s }; + } + + int len = tokens.length - count + 1; + String[] ngrams = new String[len]; + for (int i = 0; i < len; i++) { + String[] ss = new String[count]; + for (int j = 0; j < count; j++) { + ss[j] = tokens[i + j]; + } + ngrams[i] = StringUtils.join(ss,' '); + } + + return ngrams; + } + + return null; + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a string and a number"); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns an array of the word ngrams of s"); + writer.key("params"); writer.value("string s, number n"); + writer.key("returns"); writer.value("array of strings"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/strings/NGramFingerprint.java b/main/src/com/google/refine/expr/functions/strings/NGramFingerprint.java new file mode 100644 index 000000000..b49df1dae --- /dev/null +++ b/main/src/com/google/refine/expr/functions/strings/NGramFingerprint.java @@ -0,0 +1,53 @@ +package com.google.refine.expr.functions.strings; + +import java.util.Properties; +import java.util.TreeSet; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.clustering.binning.Keyer; +import com.google.refine.clustering.binning.NGramFingerprintKeyer; +import com.google.refine.expr.EvalError; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.Function; + +public class NGramFingerprint implements Function { + + static Keyer ngram_fingerprint = new NGramFingerprintKeyer(); + + public Object call(Properties bindings, Object[] args) { + if (args.length == 1 || args.length == 2) { + if (args[0] != null) { + int ngram_size = 1; + if (args.length == 2 && args[1] != null) { + ngram_size = (args[1] instanceof Number) ? ((Number) args[1]).intValue() : Integer.parseInt(args[1].toString()); + } + Object o = args[0]; + String s = (o instanceof String) ? (String) o : o.toString(); + return ngram_fingerprint.key(s,ngram_size); + } + return null; + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects at least a string"); + } + + protected TreeSet ngram_split(String s, int size) { + TreeSet set = new TreeSet(); + char[] chars = s.toCharArray(); + for (int i = 0; i + size <= chars.length; i++) { + set.add(new String(chars,i,size)); + } + return set; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns the n-gram fingerprint of s"); + writer.key("params"); writer.value("string s, number n"); + writer.key("returns"); writer.value("string"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/strings/ParseJson.java b/main/src/com/google/refine/expr/functions/strings/ParseJson.java new file mode 100644 index 000000000..d619eef20 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/strings/ParseJson.java @@ -0,0 +1,39 @@ +package com.google.refine.expr.functions.strings; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONTokener; +import org.json.JSONWriter; + +import com.google.refine.expr.EvalError; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.Function; + +public class ParseJson implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length >= 1) { + Object o1 = args[0]; + if (o1 != null) { + try { + return new JSONTokener(o1.toString()).nextValue(); + } catch (JSONException e) { + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " failed: " + e.getMessage()); + } + } + } + return null; + } + + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Parses a string as JSON"); + writer.key("params"); writer.value("string s"); + writer.key("returns"); writer.value("JSON object"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/strings/Partition.java b/main/src/com/google/refine/expr/functions/strings/Partition.java new file mode 100644 index 000000000..02fecf807 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/strings/Partition.java @@ -0,0 +1,79 @@ +package com.google.refine.expr.functions.strings; + +import java.util.Properties; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.gel.Function; + +public class Partition implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length >= 2 && args.length <= 3) { + Object o1 = args[0]; + Object o2 = args[1]; + + boolean omitFragment = false; + if (args.length == 3) { + Object o3 = args[2]; + if (o3 instanceof Boolean) { + omitFragment = ((Boolean) o3).booleanValue(); + } + } + + if (o1 != null && o2 != null && o1 instanceof String) { + String s = (String) o1; + + int from = -1; + int to = -1; + + if (o2 instanceof String) { + String frag = (String) o2; + + from = s.indexOf(frag); + to = from + frag.length(); + } else if (o2 instanceof Pattern) { + Pattern pattern = (Pattern) o2; + Matcher matcher = pattern.matcher(s); + if (matcher.find()) { + from = matcher.start(); + to = matcher.end(); + } + } + + String[] output = omitFragment ? new String[2] : new String[3]; + if (from > -1) { + output[0] = s.substring(0, from); + if (omitFragment) { + output[1] = s.substring(to); + } else { + output[1] = s.substring(from, to); + output[2] = s.substring(to); + } + } else { + output[0] = s; + output[1] = ""; + if (!omitFragment) { + output[2] = ""; + } + } + return output; + } + } + return null; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value( + "Returns an array of strings [a,frag,b] where a is the string part before the first occurrence of frag in s and b is what's left. If omitFragment is true, frag is not returned."); + writer.key("params"); writer.value("string s, string or regex frag, optional boolean omitFragment"); + writer.key("returns"); writer.value("array"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/strings/Phonetic.java b/main/src/com/google/refine/expr/functions/strings/Phonetic.java new file mode 100644 index 000000000..4bd5e441d --- /dev/null +++ b/main/src/com/google/refine/expr/functions/strings/Phonetic.java @@ -0,0 +1,51 @@ +package com.google.refine.expr.functions.strings; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.clustering.binning.DoubleMetaphoneKeyer; +import com.google.refine.clustering.binning.MetaphoneKeyer; +import com.google.refine.clustering.binning.SoundexKeyer; +import com.google.refine.expr.EvalError; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.Function; + +public class Phonetic implements Function { + + static private DoubleMetaphoneKeyer metaphone2 = new DoubleMetaphoneKeyer(); + static private MetaphoneKeyer metaphone = new MetaphoneKeyer(); + static private SoundexKeyer soundex = new SoundexKeyer(); + + public Object call(Properties bindings, Object[] args) { + if (args.length == 2) { + Object o1 = args[0]; + Object o2 = args[1]; + if (o1 != null && o2 != null && o2 instanceof String) { + String str = (o1 instanceof String) ? (String) o1 : o1.toString(); + String encoding = ((String) o2).toLowerCase(); + if ("doublemetaphone".equals(encoding)) { + return metaphone2.key(str); + } else if ("metaphone".equals(encoding)) { + return metaphone.key(str); + } else if ("soundex".equals(encoding)) { + return soundex.key(str); + } else { + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " doesn't know how to handle the '" + encoding + "' encoding."); + } + } + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects 3 strings"); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns the a phonetic encoding of s (optionally indicating which encoding to use')"); + writer.key("params"); writer.value("string s, string encoding (optional, defaults to 'DoubleMetaphone')"); + writer.key("returns"); writer.value("string"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/strings/RPartition.java b/main/src/com/google/refine/expr/functions/strings/RPartition.java new file mode 100644 index 000000000..a153b7e33 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/strings/RPartition.java @@ -0,0 +1,80 @@ +package com.google.refine.expr.functions.strings; + +import java.util.Properties; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.gel.Function; + +public class RPartition implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length >= 2 && args.length <= 3) { + Object o1 = args[0]; + Object o2 = args[1]; + + boolean omitFragment = false; + if (args.length == 3) { + Object o3 = args[2]; + if (o3 instanceof Boolean) { + omitFragment = ((Boolean) o3).booleanValue(); + } + } + + if (o1 != null && o2 != null && o1 instanceof String) { + String s = (String) o1; + + int from = -1; + int to = -1; + + if (o2 instanceof String) { + String frag = (String) o2; + + from = s.lastIndexOf(frag); + to = from + frag.length(); + } else if (o2 instanceof Pattern) { + Pattern pattern = (Pattern) o2; + Matcher matcher = pattern.matcher(s); + + while (matcher.find()) { + from = matcher.start(); + to = matcher.end(); + } + } + + String[] output = omitFragment ? new String[2] : new String[3]; + if (from > -1) { + output[0] = s.substring(0, from); + if (omitFragment) { + output[1] = s.substring(to); + } else { + output[1] = s.substring(from, to); + output[2] = s.substring(to); + } + } else { + output[0] = s; + output[1] = ""; + if (!omitFragment) { + output[2] = ""; + } + } + return output; + } + } + return null; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value( + "Returns an array of strings [a,frag,b] where a is the string part before the last occurrence of frag in s and b is what's left. If omitFragment is true, frag is not returned."); + writer.key("params"); writer.value("string s, string or regex frag, optional boolean omitFragment"); + writer.key("returns"); writer.value("array"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/strings/Reinterpret.java b/main/src/com/google/refine/expr/functions/strings/Reinterpret.java new file mode 100644 index 000000000..d16318cb4 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/strings/Reinterpret.java @@ -0,0 +1,51 @@ +package com.google.refine.expr.functions.strings; + +import java.io.UnsupportedEncodingException; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.ProjectManager; +import com.google.refine.ProjectMetadata; +import com.google.refine.expr.EvalError; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.Function; +import com.google.refine.model.Project; + +public class Reinterpret implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 2) { + Object o1 = args[0]; + Object o2 = args[1]; + if (o1 != null && o2 != null && o2 instanceof String) { + String str = (o1 instanceof String) ? (String) o1 : o1.toString(); + Project project = (Project) bindings.get("project"); + ProjectMetadata metadata = ProjectManager.singleton.getProjectMetadata(project.id); + String decoder = (String) metadata.getEncoding(); + String encoder = (String) o2; + String reinterpreted = null; + + try { + reinterpreted = new String(str.getBytes(decoder), encoder); + } catch (UnsupportedEncodingException e) { + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + ": encoding '" + encoder + "' is not available or recognized."); + } + + return reinterpreted; + } + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects 2 arguments"); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns s reinterpreted thru the given encoder."); + writer.key("params"); writer.value("string s, string encoder"); + writer.key("returns"); writer.value("string"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/strings/Replace.java b/main/src/com/google/refine/expr/functions/strings/Replace.java new file mode 100644 index 000000000..7ab986b36 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/strings/Replace.java @@ -0,0 +1,44 @@ +package com.google.refine.expr.functions.strings; + +import java.util.Properties; +import java.util.regex.Pattern; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.EvalError; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.Function; + +public class Replace implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 3) { + Object o1 = args[0]; + Object o2 = args[1]; + Object o3 = args[2]; + if (o1 != null && o2 != null && o3 != null && o3 instanceof String) { + String str = (o1 instanceof String) ? (String) o1 : o1.toString(); + + if (o2 instanceof String) { + return str.replace((String) o2, (String) o3); + } else if (o2 instanceof Pattern) { + Pattern pattern = (Pattern) o2; + return pattern.matcher(str).replaceAll((String) o3); + } + } + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects 3 strings, or 1 string, 1 regex, and 1 string"); + } + + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns the string obtained by replacing f with r in s"); + writer.key("params"); writer.value("string s, string or regex f, string r"); + writer.key("returns"); writer.value("string"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/strings/ReplaceChars.java b/main/src/com/google/refine/expr/functions/strings/ReplaceChars.java new file mode 100644 index 000000000..903cfe661 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/strings/ReplaceChars.java @@ -0,0 +1,38 @@ +package com.google.refine.expr.functions.strings; + +import java.util.Properties; + +import org.apache.commons.lang.StringUtils; +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.EvalError; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.Function; + +public class ReplaceChars implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 3) { + Object o1 = args[0]; + Object o2 = args[1]; + Object o3 = args[2]; + if (o1 != null && o2 != null && o3 != null && o2 instanceof String && o3 instanceof String) { + String str = (o1 instanceof String) ? (String) o1 : o1.toString(); + return StringUtils.replaceChars(str, (String) o2, (String) o3); + } + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects 3 strings"); + } + + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns the string obtained by replacing all chars in f with the char in s at that same position"); + writer.key("params"); writer.value("string s, string f, string r"); + writer.key("returns"); writer.value("string"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/strings/SHA1.java b/main/src/com/google/refine/expr/functions/strings/SHA1.java new file mode 100644 index 000000000..fa59f9fd5 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/strings/SHA1.java @@ -0,0 +1,33 @@ +package com.google.refine.expr.functions.strings; + +import java.util.Properties; + +import org.apache.commons.codec.digest.DigestUtils; +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.EvalError; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.Function; + +public class SHA1 implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 1 && args[0] != null) { + Object o = args[0]; + String s = (o instanceof String) ? (String) o : o.toString(); + return DigestUtils.shaHex(s); + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a string"); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns the SHA-1 hash of s"); + writer.key("params"); writer.value("string s"); + writer.key("returns"); writer.value("string"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/strings/SmartSplit.java b/main/src/com/google/refine/expr/functions/strings/SmartSplit.java new file mode 100644 index 000000000..b1cd7327d --- /dev/null +++ b/main/src/com/google/refine/expr/functions/strings/SmartSplit.java @@ -0,0 +1,78 @@ +package com.google.refine.expr.functions.strings; + +import java.io.IOException; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import au.com.bytecode.opencsv.CSVParser; + +import com.google.refine.expr.EvalError; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.Function; + +public class SmartSplit implements Function { + static protected CSVParser s_tabParser = new CSVParser( + '\t', + CSVParser.DEFAULT_QUOTE_CHARACTER, + CSVParser.DEFAULT_ESCAPE_CHARACTER, + CSVParser.DEFAULT_STRICT_QUOTES, + CSVParser.DEFAULT_IGNORE_LEADING_WHITESPACE, + false + ); + static protected CSVParser s_commaParser = new CSVParser( + ',', + CSVParser.DEFAULT_QUOTE_CHARACTER, + CSVParser.DEFAULT_ESCAPE_CHARACTER, + CSVParser.DEFAULT_STRICT_QUOTES, + CSVParser.DEFAULT_IGNORE_LEADING_WHITESPACE, + false + ); + public Object call(Properties bindings, Object[] args) { + if (args.length >= 1 && args.length <= 2) { + CSVParser parser = null; + + Object v = args[0]; + String s = v.toString(); + + if (args.length > 1) { + String sep = args[1].toString(); + parser = new CSVParser( + sep.charAt(0), + CSVParser.DEFAULT_QUOTE_CHARACTER, + CSVParser.DEFAULT_ESCAPE_CHARACTER, + CSVParser.DEFAULT_STRICT_QUOTES, + CSVParser.DEFAULT_IGNORE_LEADING_WHITESPACE, + false + ); + } + + if (parser == null) { + int tab = s.indexOf('\t'); + if (tab >= 0) { + parser = s_tabParser; + } else { + parser = s_commaParser; + } + } + + try { + return parser.parseLine(s); + } catch (IOException e) { + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " error: " + e.getMessage()); + } + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects 1 or 2 strings"); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns the array of strings obtained by splitting s with separator sep. Handles quotes properly. Guesses tab or comma separator if \"sep\" is not given."); + writer.key("params"); writer.value("string s, optional string sep"); + writer.key("returns"); writer.value("array"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/strings/Split.java b/main/src/com/google/refine/expr/functions/strings/Split.java new file mode 100644 index 000000000..7fb89b56f --- /dev/null +++ b/main/src/com/google/refine/expr/functions/strings/Split.java @@ -0,0 +1,53 @@ +package com.google.refine.expr.functions.strings; + +import java.util.Properties; +import java.util.regex.Pattern; + +import org.apache.commons.lang.StringUtils; +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.EvalError; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.Function; + +public class Split implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length >= 2 && args.length <= 3) { + boolean preserveAllTokens = false; + + Object v = args[0]; + Object split = args[1]; + if (args.length == 3) { + Object preserve = args[2]; + if (preserve instanceof Boolean) { + preserveAllTokens = ((Boolean) preserve); + } + } + + if (v != null && split != null) { + String str = (v instanceof String ? (String) v : v.toString()); + if (split instanceof String) { + return preserveAllTokens ? + StringUtils.splitByWholeSeparatorPreserveAllTokens(str, (String) split) : + StringUtils.splitByWholeSeparator(str, (String) split); + } else if (split instanceof Pattern) { + Pattern pattern = (Pattern) split; + return pattern.split(str); + } + } + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects 2 strings, or 1 string and 1 regex, followed by an optional boolean"); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns the array of strings obtained by splitting s with separator sep. If preserveAllTokens is true, then empty segments are preserved."); + writer.key("params"); writer.value("string s, string or regex sep, optional boolean preserveAllTokens"); + writer.key("returns"); writer.value("array"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/strings/SplitByCharType.java b/main/src/com/google/refine/expr/functions/strings/SplitByCharType.java new file mode 100644 index 000000000..3bd2f4a46 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/strings/SplitByCharType.java @@ -0,0 +1,35 @@ +package com.google.refine.expr.functions.strings; + +import java.util.Properties; + +import org.apache.commons.lang.StringUtils; +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.EvalError; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.Function; + +public class SplitByCharType implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 1) { + Object o = args[0]; + if (o != null) { + String s = (o instanceof String) ? (String) o : o.toString(); + return StringUtils.splitByCharacterType(s); + } + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects 2 strings"); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns an array of strings obtained by splitting s grouping consecutive chars by their unicode type"); + writer.key("params"); writer.value("string s"); + writer.key("returns"); writer.value("array"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/strings/SplitByLengths.java b/main/src/com/google/refine/expr/functions/strings/SplitByLengths.java new file mode 100644 index 000000000..61eef181d --- /dev/null +++ b/main/src/com/google/refine/expr/functions/strings/SplitByLengths.java @@ -0,0 +1,48 @@ +package com.google.refine.expr.functions.strings; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.EvalError; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.Function; + +public class SplitByLengths implements Function { + public Object call(Properties bindings, Object[] args) { + if (args.length >= 2 && args[0] != null) { + Object o = args[0]; + String s = o instanceof String ? (String) o : o.toString(); + + String[] results = new String[args.length - 1]; + + int lastIndex = 0; + + for (int i = 1; i < args.length; i++) { + int thisIndex = lastIndex; + + Object o2 = args[i]; + if (o2 instanceof Number) { + thisIndex = Math.min(s.length(), lastIndex + Math.max(0, ((Number) o2).intValue())); + } + + results[i - 1] = s.substring(lastIndex, thisIndex); + lastIndex = thisIndex; + } + + return results; + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects 1 string and 1 or more numbers"); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns the array of strings obtained by splitting s into substrings with the given lengths"); + writer.key("params"); writer.value("string s, number n, ..."); + writer.key("returns"); writer.value("array"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/strings/StartsWith.java b/main/src/com/google/refine/expr/functions/strings/StartsWith.java new file mode 100644 index 000000000..b7812a3de --- /dev/null +++ b/main/src/com/google/refine/expr/functions/strings/StartsWith.java @@ -0,0 +1,33 @@ +package com.google.refine.expr.functions.strings; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.EvalError; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.Function; + +public class StartsWith implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 2) { + Object s1 = args[0]; + Object s2 = args[1]; + if (s1 != null && s2 != null && s1 instanceof String && s2 instanceof String) { + return ((String) s1).startsWith((String) s2); + } + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects 2 strings"); + } + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns whether s starts with sub"); + writer.key("params"); writer.value("string s, string sub"); + writer.key("returns"); writer.value("boolean"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/strings/ToLowercase.java b/main/src/com/google/refine/expr/functions/strings/ToLowercase.java new file mode 100644 index 000000000..c3c33b1ca --- /dev/null +++ b/main/src/com/google/refine/expr/functions/strings/ToLowercase.java @@ -0,0 +1,31 @@ +package com.google.refine.expr.functions.strings; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.EvalError; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.Function; + +public class ToLowercase implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 1 && args[0] != null) { + Object o = args[0]; + return (o instanceof String ? (String) o : o.toString()).toLowerCase(); + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a string"); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns s converted to lowercase"); + writer.key("params"); writer.value("string s"); + writer.key("returns"); writer.value("string"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/strings/ToTitlecase.java b/main/src/com/google/refine/expr/functions/strings/ToTitlecase.java new file mode 100644 index 000000000..7a9cdb1c2 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/strings/ToTitlecase.java @@ -0,0 +1,35 @@ +package com.google.refine.expr.functions.strings; + +import java.util.Properties; + +import org.apache.commons.lang.WordUtils; +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.EvalError; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.Function; + +public class ToTitlecase implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 1 && args[0] != null) { + Object o = args[0]; + String s = o instanceof String ? (String) o : o.toString(); + + return WordUtils.capitalizeFully(s); + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a string"); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns s converted to titlecase"); + writer.key("params"); writer.value("string s"); + writer.key("returns"); writer.value("string"); + writer.endObject(); + } + +} diff --git a/main/src/com/google/refine/expr/functions/strings/ToUppercase.java b/main/src/com/google/refine/expr/functions/strings/ToUppercase.java new file mode 100644 index 000000000..df50a197f --- /dev/null +++ b/main/src/com/google/refine/expr/functions/strings/ToUppercase.java @@ -0,0 +1,31 @@ +package com.google.refine.expr.functions.strings; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.EvalError; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.Function; + +public class ToUppercase implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 1 && args[0] != null) { + Object o = args[0]; + return (o instanceof String ? (String) o : o.toString()).toUpperCase(); + } + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " expects a string"); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns s converted to uppercase"); + writer.key("params"); writer.value("string s"); + writer.key("returns"); writer.value("string"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/strings/Trim.java b/main/src/com/google/refine/expr/functions/strings/Trim.java new file mode 100644 index 000000000..df85edcc7 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/strings/Trim.java @@ -0,0 +1,30 @@ +package com.google.refine.expr.functions.strings; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.gel.Function; + +public class Trim implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 1) { + Object s1 = args[0]; + if (s1 != null && s1 instanceof String) { + return ((String) s1).trim(); + } + } + return null; + } + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns copy of the string, with leading and trailing whitespace omitted."); + writer.key("params"); writer.value("string s"); + writer.key("returns"); writer.value("string"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/strings/Unescape.java b/main/src/com/google/refine/expr/functions/strings/Unescape.java new file mode 100644 index 000000000..11a4ade85 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/strings/Unescape.java @@ -0,0 +1,53 @@ +package com.google.refine.expr.functions.strings; + +import java.io.UnsupportedEncodingException; +import java.net.URLDecoder; +import java.util.Properties; + +import org.apache.commons.lang.StringEscapeUtils; +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.EvalError; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.Function; + +public class Unescape implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 2) { + Object o1 = args[0]; + Object o2 = args[1]; + if (o1 != null && o2 != null && o1 instanceof String && o2 instanceof String) { + String s = (String) o1; + String mode = ((String) o2).toLowerCase(); + if ("html".equals(mode)) { + return StringEscapeUtils.unescapeHtml(s); + } else if ("xml".equals(mode)) { + return StringEscapeUtils.unescapeXml(s); + } else if ("csv".equals(mode)) { + return StringEscapeUtils.unescapeCsv(s); + } else if ("javascript".equals(mode)) { + return StringEscapeUtils.unescapeJavaScript(s); + } else if ("url".equals(mode)) { + try { + return URLDecoder.decode(s,"UTF-8"); + } catch (UnsupportedEncodingException e) {} + } else { + return new EvalError(ControlFunctionRegistry.getFunctionName(this) + " does not recognize mode '" + mode + "'."); + } + } + } + return null; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Unescapes all escaped parts of the string depending on the given escaping mode."); + writer.key("params"); writer.value("string s, string mode ['html','xml','csv','url','javascript']"); + writer.key("returns"); writer.value("string"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/strings/Unicode.java b/main/src/com/google/refine/expr/functions/strings/Unicode.java new file mode 100644 index 000000000..93b43ee01 --- /dev/null +++ b/main/src/com/google/refine/expr/functions/strings/Unicode.java @@ -0,0 +1,34 @@ +package com.google.refine.expr.functions.strings; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.gel.Function; + +public class Unicode implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 1 && args[0] != null) { + Object o = args[0]; + String s = (o instanceof String) ? (String) o : o.toString(); + Integer[] output = new Integer[s.length()]; + for (int i = 0; i < s.length(); i++) { + output[i] = s.codePointAt(i); + } + return output; + } + return null; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns an array of strings describing each character of s in their full unicode notation"); + writer.key("params"); writer.value("string s"); + writer.key("returns"); writer.value("string"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/functions/strings/UnicodeType.java b/main/src/com/google/refine/expr/functions/strings/UnicodeType.java new file mode 100644 index 000000000..365e599dd --- /dev/null +++ b/main/src/com/google/refine/expr/functions/strings/UnicodeType.java @@ -0,0 +1,71 @@ +package com.google.refine.expr.functions.strings; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.gel.Function; + +public class UnicodeType implements Function { + + public Object call(Properties bindings, Object[] args) { + if (args.length == 1 && args[0] != null) { + Object o = args[0]; + String s = (o instanceof String) ? (String) o : o.toString(); + String[] output = new String[s.length()]; + for (int i = 0; i < s.length(); i++) { + output[i] = translateType(Character.getType(s.codePointAt(i))); + } + return output; + } + return null; + } + + private String translateType(int type) { + switch(type) { + case 0: return "unassigned"; + case 1: return "uppercase letter"; + case 2: return "lowercase letter"; + case 3: return "titlecase letter"; + case 4: return "modifier letter"; + case 5: return "other letter"; + case 6: return "non spacing mark"; + case 7: return "enclosing mark"; + case 8: return "combining spacing mark"; + case 9: return "decimal digit number"; + case 10: return "letter number"; + case 11: return "other number"; + case 12: return "space separator"; + case 13: return "line separator"; + case 14: return "paragraph separator"; + case 15: return "control"; + case 16: return "format"; + // 17 does not seem to be used + case 18: return "private use"; + case 19: return "surrogate"; + case 20: return "dash punctuation"; + case 21: return "start punctuation"; + case 22: return "end punctuation"; + case 23: return "connector punctuation"; + case 24: return "other punctuation"; + case 25: return "math symbol"; + case 26: return "currency symbol"; + case 27: return "modifier symbol"; + case 28: return "other symbol"; + case 29: return "initial quote punctuation"; + case 30: return "final quote punctuation"; + default: return "unknown"; + } + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value("Returns an array of strings describing each character of s in their full unicode notation"); + writer.key("params"); writer.value("string s"); + writer.key("returns"); writer.value("string"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/expr/util/CalendarParser.java b/main/src/com/google/refine/expr/util/CalendarParser.java new file mode 100644 index 000000000..7e148225b --- /dev/null +++ b/main/src/com/google/refine/expr/util/CalendarParser.java @@ -0,0 +1,1941 @@ +package com.google.refine.expr.util; + +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.util.Calendar; +import java.util.GregorianCalendar; +import java.util.TimeZone; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +// Taken from http://icecube.wisc.edu/~dglo/software/calparse/index.html +// Copyright Dave Glowacki. Released under the BSD license. + +/** + * Date parser state. + */ +class ParserState { + + /** bit indicating that the year comes before the month. */ + static final int YEAR_BEFORE_MONTH = 0x4; + /** bit indicating that the year comes before the day. */ + static final int YEAR_BEFORE_DAY = 0x2; + /** bit indicating that the month comes before the day. */ + static final int MONTH_BEFORE_DAY = 0x1; + + /** bit indicating that the year comes after the month. */ + static final int YEAR_AFTER_MONTH = 0x0; + /** bit indicating that the year comes after the day. */ + static final int YEAR_AFTER_DAY = 0x0; + /** bit indicating that the month comes after the day. */ + static final int MONTH_AFTER_DAY = 0x0; + + /** value indicating an unset variable. */ + static final int UNSET = Integer.MIN_VALUE; + + /** true if year should appear before month. */ + private boolean yearBeforeMonth; + /** true if year should appear before day. */ + private boolean yearBeforeDay; + /** true if month should appear before day. */ + private boolean monthBeforeDay; + + /** year. */ + private int year; + /** month (0-11). */ + private int month; + /** day of month. */ + private int day; + /** hour (0-23). */ + private int hour; + /** minute (0-59). */ + private int minute; + /** second (0-59). */ + private int second; + /** millisecond (0-999). */ + private int milli; + + /** true if time is after noon. */ + private boolean timePostMeridian; + + /** time zone (use default time zone if this is null). */ + private TimeZone timeZone; + + /** + * Create parser state for the specified order. + * + * @param order + * YY_MM_DD, MM_DD_YY, etc. + */ + ParserState(int order) { + yearBeforeMonth = (order & YEAR_BEFORE_MONTH) == YEAR_BEFORE_MONTH; + yearBeforeDay = (order & YEAR_BEFORE_DAY) == YEAR_BEFORE_DAY; + monthBeforeDay = (order & MONTH_BEFORE_DAY) == MONTH_BEFORE_DAY; + + year = UNSET; + month = UNSET; + day = UNSET; + hour = UNSET; + minute = UNSET; + second = UNSET; + timePostMeridian = false; + } + + /** + * Get day of month. + * + * @return day of month + */ + int getDate() { + return day; + } + + /** + * Get hour. + * + * @return hour + */ + int getHour() { + return hour; + } + + /** + * Get millisecond. + * + * @return millisecond + */ + int getMillisecond() { + return milli; + } + + /** + * Get minute. + * + * @return minute + */ + int getMinute() { + return minute; + } + + /** + * Get month. + * + * @return month + */ + int getMonth() { + return month; + } + + /** + * Get second. + * + * @return second + */ + int getSecond() { + return second; + } + + /** + * Get time zone. + * + * @return time zone (null if none was specified) + */ + TimeZone getTimeZone() { + return timeZone; + } + + /** + * Get year. + * + * @return year + */ + int getYear() { + return year; + } + + /** + * Is day of month value set? + * + * @return true if a value has been assigned + */ + boolean isDateSet() { + return (day != UNSET); + } + + /** + * Is hour value set? + * + * @return true if a value has been assigned + */ + boolean isHourSet() { + return (hour != UNSET); + } + + /** + * Is millisecond value set? + * + * @return true if a value has been assigned + */ + boolean isMillisecondSet() { + return (milli != UNSET); + } + + /** + * Is minute value set? + * + * @return true if a value has been assigned + */ + boolean isMinuteSet() { + return (minute != UNSET); + } + + /** + * Is a numeric month placed before a numeric day of month? + * + * @return true if month is before day of month + */ + boolean isMonthBeforeDay() { + return monthBeforeDay; + } + + /** + * Is month value set? + * + * @return true if a value has been assigned + */ + boolean isMonthSet() { + return (month != UNSET); + } + + /** + * Is second value set? + * + * @return true if a value has been assigned + */ + boolean isSecondSet() { + return (second != UNSET); + } + + /** + * Is the time post-meridian (i.e. afternoon)? + * + * @return true if time is P.M. + */ + boolean isTimePostMeridian() { + return (timePostMeridian || hour > 12); + } + + /** + * Is a numeric year placed before a numeric day of month? + * + * @return true if year is before day of month + */ + boolean isYearBeforeDay() { + return yearBeforeDay; + } + + /** + * Is a numeric year placed before a numeric month? + * + * @return true if year is before month + */ + boolean isYearBeforeMonth() { + return yearBeforeMonth; + } + + /** + * Is year value set? + * + * @return true if a value has been assigned + */ + boolean isYearSet() { + return (year != UNSET); + } + + /** + * Fill the calendar with the parsed date. + * + * @param cal + * calendar to fill + * @param ignoreChanges + * if true, throw an exception when a date like + * Sept 31 is changed to Oct 1 + * + * @throws CalendarParserException + * if the date cannot be set for some reason + */ + void setCalendar(GregorianCalendar cal, boolean ignoreChanges) + throws CalendarParserException { + cal.clear(); + if (year != UNSET && month != UNSET && day != UNSET) { + cal.set(Calendar.YEAR, year); + cal.set(Calendar.MONTH, month - 1); + cal.set(Calendar.DATE, day); + + if (!ignoreChanges) { + final int calYear = cal.get(Calendar.YEAR); + final int calMonth = cal.get(Calendar.MONTH); + final int calDay = cal.get(Calendar.DATE); + + if (calYear != year || (calMonth + 1) != month || calDay != day) { + throw new CalendarParserException("Date was set to " + + calYear + "/" + (calMonth + 1) + "/" + calDay + + " not requested " + year + "/" + month + "/" + + day); + } + } + } + + cal.clear(Calendar.HOUR); + cal.clear(Calendar.MINUTE); + cal.clear(Calendar.SECOND); + cal.clear(Calendar.MILLISECOND); + + if (hour != UNSET && minute != UNSET) { + cal.set(Calendar.HOUR, hour); + cal.set(Calendar.MINUTE, minute); + if (second != UNSET) { + cal.set(Calendar.SECOND, second); + if (milli != UNSET) { + cal.set(Calendar.MILLISECOND, milli); + } + } + + if (timeZone != null) { + cal.setTimeZone(timeZone); + } + } + } + + /** + * Set the day of month value. + * + * @param val + * day of month value + * + * @throws CalendarParserException + * if the value is not a valid day of month + */ + void setDate(int val) throws CalendarParserException { + if (val < 1 || val > 31) { + throw new CalendarParserException("Bad day " + val); + } + + day = val; + } + + /** + * Set the hour value. + * + * @param val + * hour value + * + * @throws CalendarParserException + * if the value is not a valid hour + */ + void setHour(int val) throws CalendarParserException { + final int tmpHour; + if (timePostMeridian) { + tmpHour = val + 12; + timePostMeridian = false; + } else { + tmpHour = val; + } + + if (tmpHour < 0 || tmpHour > 23) { + throw new CalendarParserException("Bad hour " + val); + } + + hour = tmpHour; + } + + /** + * Set the millisecond value. + * + * @param val + * millisecond value + * + * @throws CalendarParserException + * if the value is not a valid millisecond + */ + void setMillisecond(int val) throws CalendarParserException { + if (val < 0 || val > 999) { + throw new CalendarParserException("Bad millisecond " + val); + } + + milli = val; + } + + /** + * Set the minute value. + * + * @param val + * minute value + * + * @throws CalendarParserException + * if the value is not a valid minute + */ + void setMinute(int val) throws CalendarParserException { + if (val < 0 || val > 59) { + throw new CalendarParserException("Bad minute " + val); + } + + minute = val; + } + + /** + * Set the month value. + * + * @param val + * month value + * + * @throws CalendarParserException + * if the value is not a valid month + */ + void setMonth(int val) throws CalendarParserException { + if (val < 1 || val > 12) { + throw new CalendarParserException("Bad month " + val); + } + + month = val; + } + + /** + * Set the second value. + * + * @param val + * second value + * + * @throws CalendarParserException + * if the value is not a valid second + */ + void setSecond(int val) throws CalendarParserException { + if (val < 0 || val > 59) { + throw new CalendarParserException("Bad second " + val); + } + + second = val; + } + + /** + * Set the AM/PM indicator value. + * + * @param val + * true if time represented is after noon + */ + void setTimePostMeridian(boolean val) { + timePostMeridian = val; + } + + /** + * Set the time zone. + * + * @param tz + * time zone + */ + void setTimeZone(TimeZone tz) { + timeZone = tz; + } + + /** + * Set the year value. + * + * @param val + * year value + * + * @throws CalendarParserException + * if the value is not a valid year + */ + void setYear(int val) throws CalendarParserException { + if (val < 0) { + throw new CalendarParserException("Bad year " + val); + } + + year = val; + } +} + +/** + * A parser for arbitrary date/time strings. + */ +public class CalendarParser { + /** bit indicating that the year comes before the month. */ + public static final int YEAR_BEFORE_MONTH = ParserState.YEAR_BEFORE_MONTH; + /** bit indicating that the year comes before the day. */ + public static final int YEAR_BEFORE_DAY = ParserState.YEAR_BEFORE_DAY; + /** bit indicating that the month comes before the day. */ + public static final int MONTH_BEFORE_DAY = ParserState.MONTH_BEFORE_DAY; + + /** bit indicating that the year comes after the month. */ + public static final int YEAR_AFTER_MONTH = ParserState.YEAR_AFTER_MONTH; + /** bit indicating that the year comes after the day. */ + public static final int YEAR_AFTER_DAY = ParserState.YEAR_AFTER_DAY; + /** bit indicating that the month comes after the day. */ + public static final int MONTH_AFTER_DAY = ParserState.MONTH_AFTER_DAY; + + /** day/month/year order. */ + public static final int DD_MM_YY = YEAR_AFTER_MONTH | YEAR_AFTER_DAY + | MONTH_AFTER_DAY; + /** month/day/year order. */ + public static final int MM_DD_YY = YEAR_AFTER_MONTH | YEAR_AFTER_DAY + | MONTH_BEFORE_DAY; + /** month/year/day order. */ + public static final int MM_YY_DD = YEAR_AFTER_MONTH | YEAR_BEFORE_DAY + | MONTH_BEFORE_DAY; + /** day/year/month order. */ + public static final int DD_YY_MM = YEAR_BEFORE_MONTH | YEAR_AFTER_DAY + | MONTH_AFTER_DAY; + /** year/day/month order. */ + public static final int YY_DD_MM = YEAR_BEFORE_MONTH | YEAR_BEFORE_DAY + | MONTH_AFTER_DAY; + /** year/month/day order. */ + public static final int YY_MM_DD = YEAR_BEFORE_MONTH | YEAR_BEFORE_DAY + | MONTH_BEFORE_DAY; + + /** list of time zone names. */ + private static final String[] zoneNames = loadTimeZoneNames(); + + /** Unknown place in time parsing. */ + private static final int PLACE_UNKNOWN = 0; + /** Parsing hour value from time string. */ + private static final int PLACE_HOUR = 1; + /** Parsing minute value from time string. */ + private static final int PLACE_MINUTE = 2; + /** Parsing second value from time string. */ + private static final int PLACE_SECOND = 3; + /** Parsing millisecond value from time string. */ + private static final int PLACE_MILLI = 4; + + /** Adjustment for two-digit years will break in 2050. */ + private static final int CENTURY_OFFSET = 2000; + + /** value indicating an unset variable. */ + private static final int UNSET = ParserState.UNSET; + + /** set to true to enable debugging. */ + private static final boolean DEBUG = false; + + /** list of weekday names. */ + private static final String[] WEEKDAY_NAMES = { "sunday", "monday", + "tuesday", "wednesday", "thursday", "friday", "saturday", }; + + /** list of month abbreviations and names. */ + private static final String[][] MONTHS = { { "jan", "January" }, + { "feb", "February" }, { "mar", "March" }, { "apr", "April" }, + { "may", "May" }, { "jun", "June" }, { "jul", "July" }, + { "aug", "August" }, { "sep", "September" }, { "oct", "October" }, + { "nov", "November" }, { "dec", "December" }, }; + + /** + * Append formatted time string to the string buffer. + * + * @param buf + * string buffer + * @param cal + * object containing time + * @param needSpace + * true if a space character should be inserted before + * any data + */ + private static final void appendTimeString(StringBuffer buf, Calendar cal, boolean needSpace) { + final int hour = cal.get(Calendar.HOUR_OF_DAY); + final int minute = cal.get(Calendar.MINUTE); + final int second = cal.get(Calendar.SECOND); + final int milli = cal.get(Calendar.MILLISECOND); + + if (hour != 0 || minute != 0 || second != 0 || milli != 0) { + if (needSpace) { + buf.append(' '); + } + if (hour < 10) { + buf.append(' '); + } + buf.append(hour); + + if (minute < 10) { + buf.append(":0"); + } else { + buf.append(':'); + } + buf.append(minute); + + if (second != 0 || milli != 0) { + if (second < 10) { + buf.append(":0"); + } else { + buf.append(':'); + } + buf.append(second); + + if (milli != 0) { + if (milli < 10) { + buf.append(".00"); + } else if (milli < 100) { + buf.append(".0"); + } else { + buf.append('.'); + } + buf.append(milli); + } + } + } + + TimeZone tz = cal.getTimeZone(); + if (tz.getRawOffset() == 0) { + buf.append(" GMT"); + } else { + buf.append(' '); + + int offset = tz.getRawOffset() / (60 * 1000); + if (offset < 0) { + buf.append('-'); + offset = -offset; + } else { + buf.append('+'); + } + + int hrOff = offset / 60; + if (hrOff < 10) { + buf.append('0'); + } + buf.append(hrOff); + buf.append(':'); + + int minOff = offset % 60; + if (minOff < 10) { + buf.append('0'); + } + buf.append(minOff); + } + } + + /** + * Return a string representation of the order value. + * + * @param order + * order + * + * @return order string + */ + public static final String getOrderString(int order) { + switch (order) { + case DD_MM_YY: + return "DD_MM_YY"; + case MM_DD_YY: + return "MM_DD_YY"; + case MM_YY_DD: + return "MM_YY_DD"; + case DD_YY_MM: + return "DD_YY_MM"; + case YY_DD_MM: + return "YY_DD_MM"; + case YY_MM_DD: + return "YY_MM_DD"; + default: + break; + } + + return "??" + order + "??"; + } + + /** + * Translate a string representation of an ordinal number to the appropriate + * numeric value.
+ * For example, "1st" would return 1, "23rd" + * would return 23, etc. + * + * @param str + * ordinal string + * + * @return the numeric value of the ordinal number, or + * CalendarParser.UNSET if the supplied string is not a + * valid ordinal number. + */ + private static final int getOrdinalNumber(String str) { + final int len = (str == null ? 0 : str.length()); + if (len >= 3) { + + String suffix = str.substring(len - 2); + if (suffix.equalsIgnoreCase("st") || suffix.equalsIgnoreCase("nd") + || suffix.equalsIgnoreCase("rd") + || suffix.equalsIgnoreCase("th")) { + try { + return Integer.parseInt(str.substring(0, len - 2)); + } catch (NumberFormatException nfe) { + // fall through if number was not parsed + } + } + } + + return UNSET; + } + + /** + * Get name of current place in time. + * + * @param place + * place ID + * + * @return place name ("hour", "minute", etc. + */ + private static final String getTimePlaceString(int place) { + switch (place) { + case PLACE_HOUR: + return "hour"; + case PLACE_MINUTE: + return "minute"; + case PLACE_SECOND: + return "second"; + case PLACE_MILLI: + return "millisecond"; + default: + break; + } + + return "unknown"; + } + + /** + * Determine is the supplied string is a value weekday name. + * + * @param str + * weekday name to check + * + * @return true if the supplied string is a weekday name. + */ + private static final boolean isWeekdayName(String str) { + if (str == null || str.length() < 3) { + return false; + } + + String lstr = str.toLowerCase(); + for (int i = 0; i < WEEKDAY_NAMES.length; i++) { + if (lstr.startsWith(WEEKDAY_NAMES[i]) + || WEEKDAY_NAMES[i].toLowerCase().startsWith(lstr)) { + return true; + } + } + + return false; + } + + /** + * Load list of time zones if sun.util.calendar.ZoneInfo exists. + * + * @return null if time zone list cannot be loaded. + */ + private static final String[] loadTimeZoneNames() { + Class zoneInfo; + try { + zoneInfo = Class.forName("sun.util.calendar.ZoneInfo"); + } catch (ClassNotFoundException cnfe) { + return null; + } + + Method method; + try { + method = zoneInfo.getDeclaredMethod("getAvailableIDs", new Class[0]); + } catch (NoSuchMethodException nsme) { + return null; + } + + Object result; + try { + result = method.invoke((Object) null); + } catch (IllegalAccessException iae) { + return null; + } catch (InvocationTargetException ite) { + return null; + } + + String[] tmpList = (String[]) result; + + int numSaved = 0; + String[] finalList = null; + + for (int i = 0; i < 2; i++) { + if (i > 0) { + if (numSaved == 0) { + return null; + } + + finalList = new String[numSaved]; + numSaved = 0; + } + + for (int j = 0; j < tmpList.length; j++) { + final int len = tmpList[j].length(); + if ((len > 2 && Character.isUpperCase(tmpList[j].charAt(1))) + && (len != 7 || !Character.isDigit(tmpList[j].charAt(3)))) { + if (finalList == null) { + numSaved++; + } else { + finalList[numSaved++] = tmpList[j]; + } + + if (len == 3 && tmpList[j].charAt(1) == 'S' + && tmpList[j].charAt(2) == 'T') { + if (finalList == null) { + numSaved++; + } else { + StringBuffer dst = new StringBuffer(); + dst.append(tmpList[j].charAt(0)); + dst.append("DT"); + finalList[numSaved++] = dst.toString(); + } + } + } + } + } + + return finalList; + } + + /** + * Convert the supplied month name to its numeric representation.
+ * For example, "January" (or any substring) would return + * 1 and "December" would return 12. + * + * @param str + * month name + * + * @return the numeric month, or CalendarParser.UNSET if the + * supplied string is not a valid month name. + */ + public static int monthNameToNumber(String str) { + if (str != null && str.length() >= 3) { + String lstr = str.toLowerCase(); + for (int i = 0; i < MONTHS.length; i++) { + if (lstr.startsWith(MONTHS[i][0]) + || MONTHS[i][1].toLowerCase().startsWith(lstr)) { + return i + 1; + } + } + } + + return UNSET; + } + + /** + * Extract a date from a string, defaulting to YY-MM-DD order for + * all-numeric strings. + * + * @param dateStr + * date string + * + * @return parsed date + * + * @throws CalendarParserException + * if there was a problem parsing the string. + */ + public static final Calendar parse(String dateStr) + throws CalendarParserException { + return parse(dateStr, YY_MM_DD); + } + + /** + * Extract a date from a string. + * + * @param dateStr + * date string + * @param order + * order in which pieces of numeric strings are assigned (should + * be one of YY_MM_DD, MM_DD_YY, etc.) + * + * @return parsed date + * + * @throws CalendarParserException + * if there was a problem parsing the string. + */ + public static final Calendar parse(String dateStr, int order) + throws CalendarParserException { + return parse(dateStr, order, true); + } + + /** + * Extract a date from a string. + * + * @param dateStr + * date string + * @param order + * order in which pieces of numeric strings are assigned (should + * be one of YY_MM_DD, MM_DD_YY, etc.) + * @param ignoreChanges + * if true, ignore date changes such as Feb 31 + * being changed to Mar 3. + * + * @return parsed date + * + * @throws CalendarParserException + * if there was a problem parsing the string. + */ + public static final Calendar parse(String dateStr, int order, + boolean ignoreChanges) throws CalendarParserException { + if (dateStr == null) { + return null; + } + + return parseString(dateStr, order, ignoreChanges); + } + + /** + * Parse a non-numeric token from the date string. + * + * @param dateStr + * full date string + * @param state + * parser state + * @param token + * string being parsed + * + * @throws CalendarParserException + * if there was a problem parsing the token + */ + private static final void parseNonNumericToken(String dateStr, + ParserState state, String token) throws CalendarParserException { + // if it's a weekday name, ignore it + if (isWeekdayName(token)) { + if (DEBUG) { + System.err.println("IGNORE \"" + token + "\" (weekday)"); + } + return; + } + + // if it looks like a time, deal with it + if (token.indexOf(':') > 0) { + final char firstChar = token.charAt(0); + if (Character.isDigit(firstChar)) { + parseTime(dateStr, state, token); + return; + } else if (firstChar == '+' || firstChar == '-') { + parseTimeZoneOffset(dateStr, state, token); + return; + } else { + throw new CalendarParserException("Unrecognized time \"" + + token + "\" in date \"" + dateStr + "\""); + } + } + + // try to parse month name + int tmpMon = monthNameToNumber(token); + + // if token isn't a month name ... PUKE + if (tmpMon != UNSET) { + + // if month number is unset, set it and move on + if (!state.isMonthSet()) { + state.setMonth(tmpMon); + if (DEBUG) { + System.err.println("MONTH=" + + MONTHS[state.getMonth() - 1][0] + " (" + token + + ") name"); + } + return; + } + + // try to move the current month value to the year or day + if (!state.isYearSet()) { + if (state.isDateSet() || state.isYearBeforeDay()) { + state.setYear(state.getMonth()); + state.setMonth(tmpMon); + if (DEBUG) { + System.err.println("MONTH=" + + MONTHS[state.getMonth() - 1][0] + ", YEAR=" + + state.getYear() + " (" + token + + ") name swap"); + } + } else { + state.setDate(state.getMonth()); + state.setMonth(tmpMon); + if (DEBUG) { + System.err.println("MONTH=" + + MONTHS[state.getMonth() - 1][0] + ", DAY=" + + state.getDate() + " (" + token + + ") name swap"); + } + } + + return; + } + + // year was already set, so try to move month value to day + if (!state.isDateSet()) { + state.setDate(state.getMonth()); + state.setMonth(tmpMon); + if (DEBUG) { + System.err.println("MONTH=" + + MONTHS[state.getMonth() - 1][0] + ", DAY=" + + state.getDate() + " (" + token + ") name swap 2"); + } + + return; + } + + // can't move month value to year or day ... PUKE + if (DEBUG) { + System.err.println("*** Too many numbers in \"" + dateStr + + "\""); + } + throw new CalendarParserException("Too many numbers in" + + " date \"" + dateStr + "\""); + } + + // maybe it's an ordinal number list "1st", "23rd", etc. + int val = getOrdinalNumber(token); + if (val == UNSET) { + final String lToken = token.toLowerCase(); + + if (lToken.equals("am")) { + // don't need to do anything + if (DEBUG) { + System.err.println("TIME=AM (" + token + ")"); + } + return; + } else if (lToken.equals("pm")) { + if (!state.isHourSet()) { + state.setTimePostMeridian(true); + } else { + state.setHour(state.getHour() + 12); + } + + if (DEBUG) { + System.err.println("TIME=PM (" + token + ")"); + } + return; + } else if (zoneNames != null) { + // maybe it's a time zone name + for (int z = 0; z < zoneNames.length; z++) { + if (token.equalsIgnoreCase(zoneNames[z])) { + TimeZone tz = TimeZone.getTimeZone(token); + if (tz.getRawOffset() != 0 || lToken.equals("gmt")) { + state.setTimeZone(tz); + return; + } + } + } + } + + if (DEBUG) { + System.err.println("*** Unknown string \"" + token + "\""); + } + throw new CalendarParserException("Unknown string \"" + token + + "\" in date \"" + dateStr + "\""); + } + + // if no day yet, we're done + if (!state.isDateSet()) { + state.setDate(val); + if (DEBUG) { + System.err.println("DAY=" + state.getDate() + " (" + token + + ") ord"); + } + return; + } + + // if either year or month is unset... + if (!state.isYearSet() || !state.isMonthSet()) { + + // if day can't be a month, shift it into year + if (state.getDate() > 12) { + if (!state.isYearSet()) { + state.setYear(state.getDate()); + state.setDate(val); + if (DEBUG) { + System.err.println("YEAR=" + state.getYear() + ", DAY=" + + state.getDate() + " (" + token + + ") ord>12 swap"); + } + return; + } + + // year was already set, maybe we can move it to month + if (state.getYear() <= 12) { + state.setMonth(state.getYear()); + state.setYear(state.getDate()); + state.setDate(val); + + if (DEBUG) { + System.err.println("YEAR=" + state.getYear() + + ", MONTH=" + state.getMonth() + ", DAY=" + + state.getDate() + " (" + token + + ") ord megaswap"); + } + + return; + } + + // try to shift day value to either year or month + } else if (!state.isYearSet()) { + if (!state.isMonthSet() && !state.isYearBeforeMonth()) { + state.setMonth(state.getDate()); + state.setDate(val); + if (DEBUG) { + System.err.println("MONTH=" + state.getMonth() + + ", DAY=" + state.getDate() + " (" + token + + ") ord swap"); + } + return; + } + + state.setYear(state.getDate()); + state.setDate(val); + if (DEBUG) { + System.err.println("YEAR=" + state.getYear() + ", DAY=" + + state.getDate() + " (" + token + ") ord swap"); + } + return; + + // year was set, so we know month is unset + } else { + + state.setMonth(state.getDate()); + state.setDate(val); + if (DEBUG) { + System.err.println("MONTH=" + state.getMonth() + ", DAY=" + + state.getDate() + " (" + token + ") ord swap#2"); + } + return; + } + } + + if (DEBUG) { + System.err.println("*** Extra number \"" + token + "\""); + } + throw new CalendarParserException("Cannot assign ordinal in \"" + + dateStr + "\""); + } + + /** + * Split a large numeric value into a year/month/date values. + * + * @param dateStr + * full date string + * @param state + * parser state + * @param val + * numeric value to use + * + * @throws CalendarParserException + * if there was a problem splitting the value + */ + private static final void parseNumericBlob(String dateStr, + ParserState state, int val) throws CalendarParserException { + if (state.isYearSet() || state.isMonthSet() || state.isDateSet()) { + throw new CalendarParserException("Unknown value " + val + + " in date \"" + dateStr + "\""); + } + + int tmpVal = val; + if (state.isYearBeforeMonth()) { + if (state.isYearBeforeDay()) { + final int last = tmpVal % 100; + tmpVal /= 100; + + final int middle = tmpVal % 100; + tmpVal /= 100; + + state.setYear(tmpVal); + if (state.isMonthBeforeDay()) { + // YYYYMMDD + state.setMonth(middle); + state.setDate(last); + } else { + // YYYYDDMM + state.setDate(middle); + state.setMonth(last); + } + } else { + // DDYYYYMM + state.setMonth(tmpVal % 100); + tmpVal /= 100; + + state.setYear(tmpVal % 10000); + tmpVal /= 10000; + + state.setDate(tmpVal); + } + } else if (state.isYearBeforeDay()) { + // MMYYYYDD + state.setDate(tmpVal % 100); + tmpVal /= 100; + + state.setYear(tmpVal % 10000); + tmpVal /= 10000; + + state.setMonth(tmpVal); + } else { + state.setYear(tmpVal % 10000); + tmpVal /= 10000; + + final int middle = tmpVal % 100; + tmpVal /= 100; + if (state.isMonthBeforeDay()) { + // MMDDYYYY + state.setDate(middle); + state.setMonth(tmpVal); + } else { + // DDMMYYYY + state.setDate(tmpVal); + state.setMonth(middle); + } + } + + if (DEBUG) { + System.err.println("YEAR=" + state.getYear() + " MONTH=" + + state.getMonth() + " DAY=" + state.getDate() + " (" + val + + ") blob"); + } + } + + /** + * Use a numeric token from the date string. + * + * @param dateStr + * full date string + * @param state + * parser state + * @param val + * numeric value to use + * + * @throws CalendarParserException + * if there was a problem parsing the token + */ + private static final void parseNumericToken(String dateStr, + ParserState state, int val) throws CalendarParserException { + // puke if we've already found 3 values + if (state.isYearSet() && state.isMonthSet() && state.isDateSet()) { + if (DEBUG) { + System.err.println("*** Extra number " + val); + } + throw new CalendarParserException("Extra value \"" + val + + "\" in date \"" + dateStr + "\""); + } + + // puke up on negative numbers + if (val < 0) { + if (DEBUG) { + System.err.println("*** Negative number " + val); + } + throw new CalendarParserException("Found negative number in" + + " date \"" + dateStr + "\""); + } + + if (val > 9999) { + parseNumericBlob(dateStr, state, val); + return; + } + + // deal with obvious years first + if (val > 31) { + + // if no year yet, assign it and move on + if (!state.isYearSet()) { + state.setYear(val); + if (DEBUG) { + System.err.println("YEAR=" + state.getYear() + " (" + val + + ") >31"); + } + return; + } + + // puke if the year value can't possibly be a day or month + if (state.getYear() > 31) { + if (DEBUG) { + System.err.println("*** Ambiguous year " + state.getYear() + + " vs. " + val); + } + String errMsg = "Couldn't decide on year number in date \"" + + dateStr + "\""; + throw new CalendarParserException(errMsg); + } + + // if the year value can't be a month... + if (state.getYear() > 12) { + + // if day isn't set, use old val as day and new val as year + if (!state.isDateSet()) { + state.setDate(state.getYear()); + state.setYear(val); + + if (DEBUG) { + System.err.println("YEAR=" + state.getYear() + ", DAY=" + + state.getDate() + " (" + val + ") >31 swap"); + } + + return; + } + + // NOTE: both day and year are set + + // try using day value as month so we can move year + // value to day and use new value as year + if (state.getDate() <= 12) { + state.setMonth(state.getDate()); + state.setDate(state.getYear()); + state.setYear(val); + + if (DEBUG) { + System.err.println("YEAR=" + state.getYear() + + ", MONTH=" + state.getMonth() + ", DAY=" + + state.getDate() + " (" + val + + ") >31 megaswap"); + } + + return; + } + + if (DEBUG) { + System.err.println("*** Unassignable year-like" + + " number " + val); + } + throw new CalendarParserException("Bad number " + val + + " found in date \"" + dateStr + "\""); + } + + // NOTE: year <= 12 + + if (!state.isDateSet() && !state.isMonthSet()) { + if (state.isMonthBeforeDay()) { + state.setMonth(state.getYear()); + state.setYear(val); + if (DEBUG) { + System.err.println("YEAR=" + state.getYear() + + ", MONTH=" + state.getMonth() + " (" + val + + ") >31 swap"); + } + } else { + state.setDate(state.getYear()); + state.setYear(val); + if (DEBUG) { + System.err + .println("YEAR=" + state.getYear() + ", DAY=" + + state.getDate() + " (" + val + + ") >31 swap#2"); + } + } + + return; + } + + if (!state.isDateSet()) { + state.setDate(state.getYear()); + state.setYear(val); + if (DEBUG) { + System.err.println("YEAR=" + state.getYear() + ", DAY=" + + state.getDate() + " (" + val + ") >31 day swap"); + } + return; + } + + // assume this was a mishandled month + state.setMonth(state.getYear()); + state.setYear(val); + + if (DEBUG) { + System.err.println("YEAR=" + state.getYear() + ", MONTH=" + + state.getMonth() + " (" + val + ") >31 mon swap"); + } + + return; + } + + // now deal with non-month values + if (val > 12) { + + // if no year value yet... + if (!state.isYearSet()) { + + // if the day is set, or if we assign year before day... + if (state.isDateSet() || state.isYearBeforeDay()) { + state.setYear(val); + if (DEBUG) { + System.err.println("YEAR=" + state.getYear() + " (" + + val + ") >12"); + } + } else { + state.setDate(val); + if (DEBUG) { + System.err.println("DAY=" + state.getDate() + " (" + + val + ") >12"); + } + } + + return; + } + + // NOTE: year is set + + // if no day value yet, assign it and move on + if (!state.isDateSet()) { + state.setDate(val); + + if (DEBUG) { + System.err.println("DAY=" + state.getDate() + " (" + val + + ") >12 !yr"); + } + + return; + } + + // NOTE: both year and day are set + + // XXX see if we can shift things around + + if (DEBUG) { + System.err.println("*** Unassignable year/day number " + val); + } + throw new CalendarParserException("Bad number " + val + + " found in date \"" + dateStr + "\""); + } + + // NOTE: ambiguous value + + // if year is set, this must be either the month or day + if (state.isYearSet()) { + if (state.isMonthSet() + || (!state.isDateSet() && !state.isMonthBeforeDay())) { + state.setDate(val); + if (DEBUG) { + System.err.println("DAY=" + state.getDate() + " (" + val + + ") ambig!yr"); + } + } else { + state.setMonth(val); + if (DEBUG) { + System.err.println("MONTH=" + state.getMonth() + " (" + val + + ") ambig!yr"); + } + } + + return; + } + + // NOTE: year not set + + // if month is set, this must be either the year or day + if (state.isMonthSet()) { + if (state.isDateSet() || state.isYearBeforeDay()) { + state.setYear(val); + if (DEBUG) { + System.err.println("YEAR=" + state.getYear() + " (" + val + + ") ambig!mo"); + } + } else { + state.setDate(val); + if (DEBUG) { + System.err.println("DAY=" + state.getDate() + " (" + val + + ") ambig!mo"); + } + } + + return; + } + + // NOTE: neither year nor month is set + + // if day is set, this must be either the year or month + if (state.isDateSet()) { + if (state.isYearBeforeMonth()) { + state.setYear(val); + if (DEBUG) { + System.err.println("YEAR=" + state.getYear() + " (" + val + + ") ambig!day"); + } + } else { + state.setMonth(val); + if (DEBUG) { + System.err.println("MONTH=" + state.getMonth() + " (" + val + + ") ambig!day"); + } + } + + return; + } + + // NOTE: no value set yet + if (state.isYearBeforeMonth()) { + if (state.isYearBeforeDay()) { + state.setYear(val); + if (DEBUG) { + System.err.println("YEAR=" + state.getYear() + " (" + val + + ") YM|YD"); + } + } else { + state.setDate(val); + if (DEBUG) { + System.err.println("DAY=" + state.getDate() + " (" + val + + ") YM!YD"); + } + } + } else if (state.isMonthBeforeDay()) { + state.setMonth(val); + if (DEBUG) { + System.err.println("MONTH=" + state.getMonth() + " (" + val + + ") !YM|MD"); + } + } else { + state.setDate(val); + if (DEBUG) { + System.err.println("DAY=" + state.getDate() + " (" + val + + ") !YM!MD"); + } + } + } + + /** + * Extract a date from the supplied string. + * + * @param dateStr + * string to parse + * @param order + * year/month/day order (YY_MM_DD, MM_DD_YY, etc.) + * @param ignoreChanges + * if true, ignore date changes such as Feb 31 + * being changed to Mar 3. + * + * @return parsed date + * + * @throws CalendarParserException + * if no valid date was found. + */ + private static final Calendar parseString(String dateStr, int order, + boolean ignoreChanges) throws CalendarParserException { + ParserState state = new ParserState(order); + + Pattern pat = Pattern.compile("([\\s/,]+|(\\S)\\-)"); + Matcher matcher = pat.matcher(dateStr); + + int prevEnd = 0; + while (prevEnd < dateStr.length()) { + String token; + if (!matcher.find()) { + token = dateStr.substring(prevEnd); + prevEnd = dateStr.length(); + } else { + final boolean isMinus = (matcher.groupCount() == 2 && matcher + .group(2) != null); + + if (!isMinus) { + token = dateStr.substring(prevEnd, matcher.start()); + } else { + token = dateStr.substring(prevEnd, matcher.start()) + + matcher.group(2); + } + + prevEnd = matcher.end(); + } + + if (DEBUG) { + System.err.println("YEAR " + + (state.isYearSet() ? Integer + .toString(state.getYear()) : "UNSET") + + ", MONTH " + + (state.isMonthSet() ? Integer.toString(state + .getMonth()) : "UNSET") + + ", DAY " + + (state.isDateSet() ? Integer + .toString(state.getDate()) : "UNSET") + + ", TOKEN=\"" + token + "\""); + } + + // try to decipher next token as a number + try { + final int val = Integer.parseInt(token); + parseNumericToken(dateStr, state, val); + } catch (NumberFormatException e) { + parseNonNumericToken(dateStr, state, token); + } + } + + // before checking for errors, check for missing year + if (!state.isDateSet() && state.getYear() <= 31) { + int tmp = state.getDate(); + state.setDate(state.getYear()); + state.setYear(tmp); + } + + if (!state.isDateSet()) { + if (!state.isMonthSet()) { + if (!state.isYearSet()) { + throw new CalendarParserException("No date found in \"" + + dateStr + "\""); + } else { + throw new CalendarParserException("Day and month missing" + + " from \"" + dateStr + "\""); + } + } else { + throw new CalendarParserException("Day missing from \"" + + dateStr + "\""); + } + } else if (!state.isMonthSet()) { + if (!state.isYearSet()) { + throw new CalendarParserException("Year and month missing" + + " from \"" + dateStr + "\""); + } else { + throw new CalendarParserException("Month missing from \"" + + dateStr + "\""); + } + } else if (!state.isYearSet()) { + throw new CalendarParserException("Year missing from \"" + dateStr + + "\""); + } + + final int tmpYear = state.getYear(); + if (tmpYear < 50) { + state.setYear(tmpYear + CENTURY_OFFSET); + } else if (tmpYear < 100) { + state.setYear(tmpYear + (CENTURY_OFFSET - 100)); + } + + GregorianCalendar cal = new GregorianCalendar(); + + state.setCalendar(cal, ignoreChanges); + + if (DEBUG) { + System.err.println("Y" + state.getYear() + " M" + state.getMonth() + + " D" + state.getDate() + " H" + state.getHour() + " M" + + state.getMinute() + " S" + state.getSecond() + " L" + + state.getMillisecond() + " => " + toString(cal)); + } + + return cal; + } + + /** + * Parse a time string. + * + * @param dateStr + * full date string + * @param state + * parser state + * @param timeStr + * string containing colon-separated time + * + * @throws CalendarParserException + * if there is a problem with the time + */ + private static final void parseTime(String dateStr, ParserState state, + String timeStr) throws CalendarParserException { + int place = PLACE_HOUR; + + String tmpTime; + + final char lastChar = timeStr.charAt(timeStr.length() - 1); + if (lastChar != 'm' && lastChar != 'M') { + if (DEBUG) { + System.err.println("No AM/PM in \"" + timeStr + "\" (time)"); + } + tmpTime = timeStr; + } else { + final char preLast = timeStr.charAt(timeStr.length() - 2); + if (preLast == 'a' || preLast == 'A') { + state.setTimePostMeridian(false); + } else if (preLast == 'p' || preLast == 'P') { + state.setTimePostMeridian(true); + } else { + throw new CalendarParserException("Bad time \"" + timeStr + + "\" in date \"" + dateStr + "\""); + } + + tmpTime = timeStr.substring(0, timeStr.length() - 2); + if (DEBUG) { + System.err.println("Found " + + (state.isTimePostMeridian() ? "PM" : "AM") + + ". now \"" + tmpTime + "\" (time)"); + } + } + + String[] tList = tmpTime.split("[:\\.]"); + for (int i = 0; i < tList.length; i++) { + String token = tList[i]; + + if (DEBUG) { + System.err.println("HOUR " + + (state.isHourSet() ? Integer + .toString(state.getHour()) : "UNSET") + + ", MINUTE " + + (state.isMinuteSet() ? Integer.toString(state + .getMinute()) : "UNSET") + + ", SECOND " + + (state.isSecondSet() ? Integer.toString(state + .getSecond()) : "UNSET") + + ", MILLISECOND " + + (state.isMillisecondSet() ? Integer.toString(state + .getMillisecond()) : "UNSET") + ", TOKEN=\"" + + token + "\""); + } + + final int val; + try { + val = Integer.parseInt(token); + } catch (NumberFormatException nfe) { + throw new CalendarParserException("Bad " + + getTimePlaceString(place) + " string \"" + token + + "\" in \"" + dateStr + "\""); + } + + switch (place) { + case PLACE_HOUR: + try { + state.setHour(val); + } catch (CalendarParserException dfe) { + throw new CalendarParserException(dfe.getMessage() + + " in \"" + dateStr + "\""); + } + if (DEBUG) { + System.err.println("Set hour to " + val); + } + place = PLACE_MINUTE; + break; + case PLACE_MINUTE: + try { + state.setMinute(val); + } catch (CalendarParserException dfe) { + throw new CalendarParserException(dfe.getMessage() + + " in \"" + dateStr + "\""); + } + if (DEBUG) { + System.err.println("Set minute to " + val); + } + place = PLACE_SECOND; + break; + case PLACE_SECOND: + try { + state.setSecond(val); + } catch (CalendarParserException dfe) { + throw new CalendarParserException(dfe.getMessage() + + " in \"" + dateStr + "\""); + } + if (DEBUG) { + System.err.println("Set second to " + val); + } + place = PLACE_MILLI; + break; + case PLACE_MILLI: + try { + state.setMillisecond(val); + } catch (CalendarParserException dfe) { + throw new CalendarParserException(dfe.getMessage() + + " in \"" + dateStr + "\""); + } + if (DEBUG) { + System.err.println("Set millisecond to " + val); + } + place = PLACE_UNKNOWN; + break; + default: + throw new CalendarParserException("Unexpected place value " + + place); + } + } + } + + /** + * Parse a time zone offset string. + * + * @param dateStr + * full date string + * @param state + * parser state + * @param zoneStr + * string containing colon-separated time zone offset + * + * @throws CalendarParserException + * if there is a problem with the time + */ + private static final void parseTimeZoneOffset(String dateStr, + ParserState state, String zoneStr) throws CalendarParserException { + int place = PLACE_HOUR; + + final boolean isNegative = (zoneStr.charAt(0) == '-'); + if (!isNegative && zoneStr.charAt(0) != '+') { + throw new CalendarParserException("Bad time zone offset \"" + + zoneStr + "\" in date \"" + dateStr + "\""); + } + + int hour = UNSET; + int minute = UNSET; + + String[] tList = zoneStr.substring(1).split(":"); + for (int i = 0; i < tList.length; i++) { + String token = tList[i]; + + if (DEBUG) { + System.err + .println("TZ_HOUR " + + (hour != UNSET ? Integer.toString(hour) + : "UNSET") + + ", TZ_MINUTE " + + (minute != UNSET ? Integer.toString(minute) + : "UNSET") + ", TOKEN=\"" + token + + "\""); + } + + final int val; + try { + val = Integer.parseInt(token); + } catch (NumberFormatException nfe) { + throw new CalendarParserException("Bad time zone " + + getTimePlaceString(place) + " offset \"" + token + + "\" in \"" + dateStr + "\""); + } + + switch (place) { + case PLACE_HOUR: + hour = val; + if (DEBUG) { + System.err.println("Set time zone offset hour to " + val); + } + place = PLACE_MINUTE; + break; + case PLACE_MINUTE: + minute = val; + if (DEBUG) { + System.err.println("Set time zone offset minute to " + val); + } + place = PLACE_UNKNOWN; + break; + default: + throw new CalendarParserException("Unexpected place value " + + place); + } + } + + String customID = "GMT" + (isNegative ? "-" : "+") + hour + ":" + + (minute < 10 ? "0" : "") + minute; + + state.setTimeZone(TimeZone.getTimeZone(customID)); + } + + /** + * Return a printable representation of the date. + * + * @param cal + * calendar to convert to a string + * + * @return a printable string. + */ + public static final String prettyString(Calendar cal) { + if (cal == null) { + return null; + } + + final int calYear = cal.get(Calendar.YEAR); + final int calMonth = cal.get(Calendar.MONTH); + final int calDay = cal.get(Calendar.DATE); + + boolean needSpace = false; + StringBuffer buf = new StringBuffer(); + + if (calMonth >= 0 && calMonth < MONTHS.length) { + if (needSpace) { + buf.append(' '); + } + buf.append(MONTHS[calMonth][1]); + needSpace = true; + } + if (calDay > 0) { + if (needSpace) { + buf.append(' '); + } + buf.append(calDay); + if (calYear > UNSET) { + buf.append(','); + } + needSpace = true; + } + if (calYear > UNSET) { + if (needSpace) { + buf.append(' '); + } + buf.append(calYear); + } + + appendTimeString(buf, cal, needSpace); + + return buf.toString(); + } + + /** + * Return a basic representation of the string. + * + * @param cal + * calendar to convert to a string + * + * @return the basic string. + */ + public static final String toString(Calendar cal) { + if (cal == null) { + return null; + } + + final int calYear = cal.get(Calendar.YEAR); + final int calMonth = cal.get(Calendar.MONTH); + final int calDay = cal.get(Calendar.DATE); + + boolean needSpace = false; + StringBuffer buf = new StringBuffer(); + + if (calDay > 0) { + if (needSpace) { + buf.append(' '); + } + buf.append(calDay); + needSpace = true; + } + if (calMonth >= 0 && calMonth < MONTHS.length) { + if (needSpace) { + buf.append(' '); + } + buf.append(MONTHS[calMonth][1].substring(0, 3)); + needSpace = true; + } + if (calYear > UNSET) { + if (needSpace) { + buf.append(' '); + } + buf.append(calYear); + } + + appendTimeString(buf, cal, needSpace); + + return buf.toString(); + } + + /** + * Return a string representation of the date suitable for use in an SQL + * statement. + * + * @param cal + * calendar to convert to a string + * + * @return the SQL-friendly string. + */ + public static final String toSQLString(Calendar cal) { + if (cal == null) { + return null; + } + + final int calYear = cal.get(Calendar.YEAR); + final int calMonth = cal.get(Calendar.MONTH); + final int calDay = cal.get(Calendar.DATE); + + StringBuffer buf = new StringBuffer(); + + buf.append(calYear); + buf.append('-'); + if ((calMonth + 1) < 10) { + buf.append('0'); + } + buf.append(calMonth + 1); + buf.append('-'); + if (calDay < 10) { + buf.append('0'); + } + buf.append(calDay); + + appendTimeString(buf, cal, true); + + return buf.toString(); + } +} diff --git a/main/src/com/google/refine/expr/util/CalendarParserException.java b/main/src/com/google/refine/expr/util/CalendarParserException.java new file mode 100644 index 000000000..8a9d7ef89 --- /dev/null +++ b/main/src/com/google/refine/expr/util/CalendarParserException.java @@ -0,0 +1,24 @@ +package com.google.refine.expr.util; + +// Taken from http://icecube.wisc.edu/~dglo/software/calparse/index.html +// Copyright Dave Glowacki. Released under the BSD license. + +/** + * Thrown when an invalid date is encountered in CalendarParser. + */ +public class CalendarParserException extends Exception { + + private static final long serialVersionUID = 7195725880623801198L; + + /** + * Default date format exception. + */ + public CalendarParserException() { super(); } + + /** + * Date format exception. + * + * @param str error message + */ + public CalendarParserException(String str) { super(str); } +} diff --git a/main/src/com/google/refine/gel/Control.java b/main/src/com/google/refine/gel/Control.java new file mode 100644 index 000000000..c19731a1e --- /dev/null +++ b/main/src/com/google/refine/gel/Control.java @@ -0,0 +1,17 @@ +package com.google.refine.gel; + +import java.util.Properties; + +import com.google.refine.Jsonizable; +import com.google.refine.expr.Evaluable; + +/** + * Interface of GEL controls such as if, forEach, forNonBlank, with. A control can + * decide which part of the code to execute and can affect the environment bindings. + * Functions, on the other hand, can't do either. + */ +public interface Control extends Jsonizable { + public Object call(Properties bindings, Evaluable[] args); + + public String checkArguments(Evaluable[] args); +} diff --git a/main/src/com/google/refine/gel/ControlFunctionRegistry.java b/main/src/com/google/refine/gel/ControlFunctionRegistry.java new file mode 100644 index 000000000..b5c3f81be --- /dev/null +++ b/main/src/com/google/refine/gel/ControlFunctionRegistry.java @@ -0,0 +1,216 @@ +package com.google.refine.gel; + +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import java.util.Map.Entry; + +import com.google.refine.expr.functions.Cross; +import com.google.refine.expr.functions.FacetCount; +import com.google.refine.expr.functions.Get; +import com.google.refine.expr.functions.HasField; +import com.google.refine.expr.functions.Jsonize; +import com.google.refine.expr.functions.Length; +import com.google.refine.expr.functions.Slice; +import com.google.refine.expr.functions.ToDate; +import com.google.refine.expr.functions.ToNumber; +import com.google.refine.expr.functions.ToString; +import com.google.refine.expr.functions.Type; +import com.google.refine.expr.functions.arrays.Join; +import com.google.refine.expr.functions.arrays.Reverse; +import com.google.refine.expr.functions.arrays.Sort; +import com.google.refine.expr.functions.arrays.Uniques; +import com.google.refine.expr.functions.booleans.And; +import com.google.refine.expr.functions.booleans.Not; +import com.google.refine.expr.functions.booleans.Or; +import com.google.refine.expr.functions.date.DatePart; +import com.google.refine.expr.functions.date.Inc; +import com.google.refine.expr.functions.date.Now; +import com.google.refine.expr.functions.math.Ceil; +import com.google.refine.expr.functions.math.Exp; +import com.google.refine.expr.functions.math.Floor; +import com.google.refine.expr.functions.math.Ln; +import com.google.refine.expr.functions.math.Log; +import com.google.refine.expr.functions.math.Max; +import com.google.refine.expr.functions.math.Min; +import com.google.refine.expr.functions.math.Mod; +import com.google.refine.expr.functions.math.Pow; +import com.google.refine.expr.functions.math.Round; +import com.google.refine.expr.functions.math.Sum; +import com.google.refine.expr.functions.strings.Chomp; +import com.google.refine.expr.functions.strings.Contains; +import com.google.refine.expr.functions.strings.Diff; +import com.google.refine.expr.functions.strings.EndsWith; +import com.google.refine.expr.functions.strings.Escape; +import com.google.refine.expr.functions.strings.Fingerprint; +import com.google.refine.expr.functions.strings.IndexOf; +import com.google.refine.expr.functions.strings.LastIndexOf; +import com.google.refine.expr.functions.strings.MD5; +import com.google.refine.expr.functions.strings.Match; +import com.google.refine.expr.functions.strings.NGram; +import com.google.refine.expr.functions.strings.NGramFingerprint; +import com.google.refine.expr.functions.strings.ParseJson; +import com.google.refine.expr.functions.strings.Partition; +import com.google.refine.expr.functions.strings.Phonetic; +import com.google.refine.expr.functions.strings.RPartition; +import com.google.refine.expr.functions.strings.Reinterpret; +import com.google.refine.expr.functions.strings.Replace; +import com.google.refine.expr.functions.strings.ReplaceChars; +import com.google.refine.expr.functions.strings.SHA1; +import com.google.refine.expr.functions.strings.SmartSplit; +import com.google.refine.expr.functions.strings.Split; +import com.google.refine.expr.functions.strings.SplitByCharType; +import com.google.refine.expr.functions.strings.SplitByLengths; +import com.google.refine.expr.functions.strings.StartsWith; +import com.google.refine.expr.functions.strings.ToLowercase; +import com.google.refine.expr.functions.strings.ToTitlecase; +import com.google.refine.expr.functions.strings.ToUppercase; +import com.google.refine.expr.functions.strings.Trim; +import com.google.refine.expr.functions.strings.Unescape; +import com.google.refine.expr.functions.strings.Unicode; +import com.google.refine.expr.functions.strings.UnicodeType; +import com.google.refine.gel.controls.Filter; +import com.google.refine.gel.controls.ForEach; +import com.google.refine.gel.controls.ForEachIndex; +import com.google.refine.gel.controls.ForNonBlank; +import com.google.refine.gel.controls.ForRange; +import com.google.refine.gel.controls.If; +import com.google.refine.gel.controls.IsBlank; +import com.google.refine.gel.controls.IsError; +import com.google.refine.gel.controls.IsNonBlank; +import com.google.refine.gel.controls.IsNotNull; +import com.google.refine.gel.controls.IsNull; +import com.google.refine.gel.controls.IsNumeric; +import com.google.refine.gel.controls.With; + +public class ControlFunctionRegistry { + + static private Map s_nameToFunction = new HashMap(); + static private Map s_functionToName = new HashMap(); + + static private Map s_nameToControl = new HashMap(); + static private Map s_controlToName = new HashMap(); + + static public Function getFunction(String name) { + return s_nameToFunction.get(name); + } + static public String getFunctionName(Function f) { + return s_functionToName.get(f); + } + static public Set> getFunctionMapping() { + return s_nameToFunction.entrySet(); + } + + static public Control getControl(String name) { + return s_nameToControl.get(name); + } + static public String getControlName(Control f) { + return s_controlToName.get(f); + } + static public Set> getControlMapping() { + return s_nameToControl.entrySet(); + } + + static public void registerFunction(String name, Function f) { + s_nameToFunction.put(name, f); + s_functionToName.put(f, name); + } + + static public void registerControl(String name, Control c) { + s_nameToControl.put(name, c); + s_controlToName.put(c, name); + } + + static { + registerFunction("type", new Type()); + + registerFunction("toString", new ToString()); + registerFunction("toNumber", new ToNumber()); + registerFunction("toDate", new ToDate()); + + registerFunction("toUppercase", new ToUppercase()); + registerFunction("toLowercase", new ToLowercase()); + registerFunction("toTitlecase", new ToTitlecase()); + + registerFunction("hasField", new HasField()); + registerFunction("get", new Get()); + registerFunction("slice", new Slice()); + registerFunction("substring", new Slice()); + registerFunction("replace", new Replace()); + registerFunction("replaceChars", new ReplaceChars()); + registerFunction("split", new Split()); + registerFunction("smartSplit", new SmartSplit()); + registerFunction("splitByCharType", new SplitByCharType()); + registerFunction("splitByLengths", new SplitByLengths()); + registerFunction("partition", new Partition()); + registerFunction("rpartition", new RPartition()); + registerFunction("trim", new Trim()); + registerFunction("strip", new Trim()); + registerFunction("contains", new Contains()); + registerFunction("escape", new Escape()); + registerFunction("unescape", new Unescape()); + registerFunction("length", new Length()); + registerFunction("sha1", new SHA1()); + registerFunction("md5", new MD5()); + registerFunction("unicode", new Unicode()); + registerFunction("unicodeType", new UnicodeType()); + registerFunction("diff", new Diff()); + registerFunction("chomp", new Chomp()); + registerFunction("fingerprint", new Fingerprint()); + registerFunction("ngramFingerprint", new NGramFingerprint()); + registerFunction("phonetic", new Phonetic()); + registerFunction("reinterpret", new Reinterpret()); + registerFunction("jsonize", new Jsonize()); + registerFunction("parseJson", new ParseJson()); + registerFunction("ngram", new NGram()); + registerFunction("match", new Match()); + + registerFunction("indexOf", new IndexOf()); + registerFunction("lastIndexOf", new LastIndexOf()); + registerFunction("startsWith", new StartsWith()); + registerFunction("endsWith", new EndsWith()); + registerFunction("join", new Join()); + registerFunction("reverse", new Reverse()); + registerFunction("sort", new Sort()); + registerFunction("uniques", new Uniques()); + + registerFunction("now", new Now()); + registerFunction("inc", new Inc()); + registerFunction("datePart", new DatePart()); + + registerFunction("round", new Round()); + registerFunction("floor", new Floor()); + registerFunction("ceil", new Ceil()); + registerFunction("mod", new Mod()); + registerFunction("max", new Max()); + registerFunction("min", new Min()); + registerFunction("log", new Log()); + registerFunction("ln", new Ln()); + registerFunction("pow", new Pow()); + registerFunction("exp", new Exp()); + registerFunction("sum", new Sum()); + + registerFunction("and", new And()); + registerFunction("or", new Or()); + registerFunction("not", new Not()); + + registerFunction("cross", new Cross()); + + registerFunction("facetCount", new FacetCount()); + + registerControl("if", new If()); + registerControl("with", new With()); + registerControl("forEach", new ForEach()); + registerControl("forEachIndex", new ForEachIndex()); + registerControl("forRange", new ForRange()); + registerControl("filter", new Filter()); + registerControl("forNonBlank", new ForNonBlank()); + + registerControl("isNull", new IsNull()); + registerControl("isNotNull", new IsNotNull()); + registerControl("isBlank", new IsBlank()); + registerControl("isNonBlank", new IsNonBlank()); + registerControl("isNumeric", new IsNumeric()); + registerControl("isError", new IsError()); + } +} diff --git a/main/src/com/google/refine/gel/Function.java b/main/src/com/google/refine/gel/Function.java new file mode 100644 index 000000000..dd6fff7da --- /dev/null +++ b/main/src/com/google/refine/gel/Function.java @@ -0,0 +1,13 @@ +package com.google.refine.gel; + +import java.util.Properties; + +import com.google.refine.Jsonizable; + +/** + * Interface for functions. When a function is called, its arguments have already + * been evaluated down into non-error values. + */ +public interface Function extends Jsonizable { + public Object call(Properties bindings, Object[] args); +} diff --git a/main/src/com/google/refine/gel/Parser.java b/main/src/com/google/refine/gel/Parser.java new file mode 100644 index 000000000..adebfe737 --- /dev/null +++ b/main/src/com/google/refine/gel/Parser.java @@ -0,0 +1,291 @@ +package com.google.refine.gel; + +import java.util.LinkedList; +import java.util.List; +import java.util.regex.Pattern; + +import com.google.refine.expr.Evaluable; +import com.google.refine.expr.ParsingException; +import com.google.refine.expr.functions.arrays.ArgsToArray; +import com.google.refine.gel.Scanner.NumberToken; +import com.google.refine.gel.Scanner.RegexToken; +import com.google.refine.gel.Scanner.Token; +import com.google.refine.gel.Scanner.TokenType; +import com.google.refine.gel.ast.ControlCallExpr; +import com.google.refine.gel.ast.FieldAccessorExpr; +import com.google.refine.gel.ast.FunctionCallExpr; +import com.google.refine.gel.ast.LiteralExpr; +import com.google.refine.gel.ast.OperatorCallExpr; +import com.google.refine.gel.ast.VariableExpr; + +public class Parser { + protected Scanner _scanner; + protected Token _token; + protected Evaluable _root; + + public Parser(String s) throws ParsingException { + this(s, 0, s.length()); + } + + public Parser(String s, int from, int to) throws ParsingException { + _scanner = new Scanner(s, from, to); + _token = _scanner.next(true); + + _root = parseExpression(); + } + + public Evaluable getExpression() { + return _root; + } + + protected void next(boolean regexPossible) { + _token = _scanner.next(regexPossible); + } + + protected ParsingException makeException(String desc) { + int index = _token != null ? _token.start : _scanner.getIndex(); + + return new ParsingException("Parsing error at offset " + index + ": " + desc); + } + + /** + * := + * | [ "<" "<=" ">" ">=" "==" "!=" ] + */ + protected Evaluable parseExpression() throws ParsingException { + Evaluable sub = parseSubExpression(); + + while (_token != null && + _token.type == TokenType.Operator && + ">=<==!=".indexOf(_token.text) >= 0) { + + String op = _token.text; + + next(true); + + Evaluable sub2 = parseSubExpression(); + + sub = new OperatorCallExpr(new Evaluable[] { sub, sub2 }, op); + } + + return sub; + } + + /** + * := + * | [ "+" "-" ] + */ + protected Evaluable parseSubExpression() throws ParsingException { + Evaluable sub = parseTerm(); + + while (_token != null && + _token.type == TokenType.Operator && + "+-".indexOf(_token.text) >= 0) { + + String op = _token.text; + + next(true); + + Evaluable sub2 = parseSubExpression(); + + sub = new OperatorCallExpr(new Evaluable[] { sub, sub2 }, op); + } + + return sub; + } + + /** + * := + * | [ "*" "/" ] + */ + protected Evaluable parseTerm() throws ParsingException { + Evaluable factor = parseFactor(); + + while (_token != null && + _token.type == TokenType.Operator && + "*/".indexOf(_token.text) >= 0) { + + String op = _token.text; + + next(true); + + Evaluable factor2 = parseFactor(); + + factor = new OperatorCallExpr(new Evaluable[] { factor, factor2 }, op); + } + + return factor; + } + + /** + * := ( )* + * := + * | | - | | | + * ( ) + * + * := "[" "]" + * | "." + * | "." "(" ")" + * + */ + protected Evaluable parseFactor() throws ParsingException { + if (_token == null) { + throw makeException("Expecting something more at end of expression"); + } + + Evaluable eval = null; + + if (_token.type == TokenType.String) { + eval = new LiteralExpr(_token.text); + next(false); + } else if (_token.type == TokenType.Regex) { + RegexToken t = (RegexToken) _token; + + try { + Pattern pattern = Pattern.compile(_token.text, t.caseInsensitive ? Pattern.CASE_INSENSITIVE : 0); + eval = new LiteralExpr(pattern); + next(false); + } catch (Exception e) { + throw makeException("Bad regular expression (" + e.getMessage() + ")"); + } + } else if (_token.type == TokenType.Number) { + eval = new LiteralExpr(((NumberToken)_token).value); + next(false); + } else if (_token.type == TokenType.Operator && _token.text.equals("-")) { // unary minus? + next(true); + + if (_token != null && _token.type == TokenType.Number) { + eval = new LiteralExpr(-((NumberToken)_token).value); + next(false); + } else { + throw makeException("Bad negative number"); + } + } else if (_token.type == TokenType.Identifier) { + String text = _token.text; + next(false); + + if (_token == null || _token.type != TokenType.Delimiter || !_token.text.equals("(")) { + eval = "null".equals(text) ? new LiteralExpr(null) : new VariableExpr(text); + } else { + Function f = ControlFunctionRegistry.getFunction(text); + Control c = ControlFunctionRegistry.getControl(text); + if (f == null && c == null) { + throw makeException("Unknown function or control named " + text); + } + + next(true); // swallow ( + + List args = parseExpressionList(")"); + + if (c != null) { + Evaluable[] argsA = makeArray(args); + String errorMessage = c.checkArguments(argsA); + if (errorMessage != null) { + throw makeException(errorMessage); + } + eval = new ControlCallExpr(argsA, c); + } else { + eval = new FunctionCallExpr(makeArray(args), f); + } + } + } else if (_token.type == TokenType.Delimiter && _token.text.equals("(")) { + next(true); + + eval = parseExpression(); + + if (_token != null && _token.type == TokenType.Delimiter && _token.text.equals(")")) { + next(false); + } else { + throw makeException("Missing )"); + } + } else if (_token.type == TokenType.Delimiter && _token.text.equals("[")) { // [ ... ] array + next(true); // swallow [ + + List args = parseExpressionList("]"); + + eval = new FunctionCallExpr(makeArray(args), new ArgsToArray()); + } else { + throw makeException("Missing number, string, identifier, regex, or parenthesized expression"); + } + + while (_token != null) { + if (_token.type == TokenType.Operator && _token.text.equals(".")) { + next(false); // swallow . + + if (_token == null || _token.type != TokenType.Identifier) { + throw makeException("Missing function name"); + } + + String identifier = _token.text; + next(false); + + if (_token != null && _token.type == TokenType.Delimiter && _token.text.equals("(")) { + next(true); // swallow ( + + Function f = ControlFunctionRegistry.getFunction(identifier); + if (f == null) { + throw makeException("Unknown function " + identifier); + } + + List args = parseExpressionList(")"); + args.add(0, eval); + + eval = new FunctionCallExpr(makeArray(args), f); + } else { + eval = new FieldAccessorExpr(eval, identifier); + } + } else if (_token.type == TokenType.Delimiter && _token.text.equals("[")) { + next(true); // swallow [ + + List args = parseExpressionList("]"); + args.add(0, eval); + + eval = new FunctionCallExpr(makeArray(args), ControlFunctionRegistry.getFunction("get")); + } else { + break; + } + } + + return eval; + } + + /** + * := + * | ( "," )* + * + */ + protected List parseExpressionList(String closingDelimiter) throws ParsingException { + List l = new LinkedList(); + + if (_token != null && + (_token.type != TokenType.Delimiter || !_token.text.equals(closingDelimiter))) { + + while (_token != null) { + Evaluable eval = parseExpression(); + + l.add(eval); + + if (_token != null && _token.type == TokenType.Delimiter && _token.text.equals(",")) { + next(true); // swallow comma, loop back for more + } else { + break; + } + } + } + + if (_token != null && _token.type == TokenType.Delimiter && _token.text.equals(closingDelimiter)) { + next(false); // swallow closing delimiter + } else { + throw makeException("Missing " + closingDelimiter); + } + + return l; + } + + protected Evaluable[] makeArray(List l) { + Evaluable[] a = new Evaluable[l.size()]; + l.toArray(a); + + return a; + } +} diff --git a/main/src/com/google/refine/gel/Scanner.java b/main/src/com/google/refine/gel/Scanner.java new file mode 100644 index 000000000..bd2080f49 --- /dev/null +++ b/main/src/com/google/refine/gel/Scanner.java @@ -0,0 +1,304 @@ +package com.google.refine.gel; + +public class Scanner { + static public enum TokenType { + Error, + Delimiter, + Operator, + Identifier, + Number, + String, + Regex + } + + static public class Token { + final public int start; + final public int end; + final public TokenType type; + final public String text; + + Token(int start, int end, TokenType type, String text) { + this.start = start; + this.end = end; + this.type = type; + this.text = text; + } + } + + static public class ErrorToken extends Token { + final public String detail; // error detail + + public ErrorToken(int start, int end, String text, String detail) { + super(start, end, TokenType.Error, text); + this.detail = detail; + } + } + + static public class NumberToken extends Token { + final public double value; + + public NumberToken(int start, int end, String text, double value) { + super(start, end, TokenType.Number, text); + this.value = value; + } + } + + static public class RegexToken extends Token { + final public boolean caseInsensitive; + + public RegexToken(int start, int end, String text, boolean caseInsensitive) { + super(start, end, TokenType.Regex, text); + this.caseInsensitive = caseInsensitive; + } + } + + protected String _text; // input text to tokenize + protected int _index; // index of the next character to process + protected int _limit; // process up to this index + + public Scanner(String s) { + this(s, 0, s.length()); + } + + public Scanner(String s, int from, int to) { + _text = s; + _index = from; + _limit = to; + } + + public int getIndex() { + return _index; + } + + /** + * The regexPossible flag is used by the parser to hint the scanner what to do + * when it encounters a slash. Since the divide operator / and the opening + * delimiter of a regex literal are the same, but divide operators and regex + * literals can't occur at the same place in an expression, this flag is a cheap + * way to distinguish the two without having to look ahead. + * + * @param regexPossible + * @return + */ + public Token next(boolean regexPossible) { + // skip whitespace + while (_index < _limit && Character.isWhitespace(_text.charAt(_index))) { + _index++; + } + if (_index == _limit) { + return null; + } + + char c = _text.charAt(_index); + int start = _index; + String detail = null; + + if (Character.isDigit(c)) { // number literal + double value = 0; + + while (_index < _limit && Character.isDigit(c = _text.charAt(_index))) { + value = value * 10 + (c - '0'); + _index++; + } + + if (_index < _limit && c == '.') { + _index++; + + double division = 1; + while (_index < _limit && Character.isDigit(c = _text.charAt(_index))) { + value = value * 10 + (c - '0'); + division *= 10; + _index++; + } + + value /= division; + } + + // TODO: support exponent e notation + + return new NumberToken( + start, + _index, + _text.substring(start, _index), + value + ); + } else if (c == '"' || c == '\'') { + /* + * String Literal + */ + + StringBuffer sb = new StringBuffer(); + char delimiter = c; + + _index++; // skip opening delimiter + + while (_index < _limit) { + c = _text.charAt(_index); + if (c == delimiter) { + _index++; // skip closing delimiter + + return new Token( + start, + _index, + TokenType.String, + sb.toString() + ); + } else if (c == '\\') { + _index++; // skip escaping marker + if (_index < _limit) { + char c2 = _text.charAt(_index); + if (c2 == 't') { + sb.append('\t'); + } else if (c2 == 'n') { + sb.append('\n'); + } else if (c2 == 'r') { + sb.append('\r'); + } else if (c2 == '\\') { + sb.append('\\'); + } else { + sb.append(c2); + } + } + } else { + sb.append(c); + } + _index++; + } + + detail = "String not properly closed"; + // fall through + + } else if (Character.isLetter(c) || c == '_') { // identifier + while (_index < _limit) { + char c1 = _text.charAt(_index); + if (c1 == '_' || Character.isLetterOrDigit(c1)) { + _index++; + } else { + break; + } + } + + return new Token( + start, + _index, + TokenType.Identifier, + _text.substring(start, _index) + ); + } else if (c == '/' && regexPossible) { + /* + * Regex literal + */ + StringBuffer sb = new StringBuffer(); + + _index++; // skip opening delimiter + + while (_index < _limit) { + c = _text.charAt(_index); + if (c == '/') { + _index++; // skip closing delimiter + + boolean caseInsensitive = false; + if (_index < _limit && _text.charAt(_index) == 'i') { + caseInsensitive = true; + _index++; + } + + return new RegexToken( + start, + _index, + sb.toString(), + caseInsensitive + ); + } else if (c == '\\') { + sb.append(c); + + _index++; // skip escaping marker + if (_index < _limit) { + sb.append(_text.charAt(_index)); + } + } else { + sb.append(c); + } + _index++; + } + + detail = "Regex not properly closed"; + // fall through + } else if ("+-*/.".indexOf(c) >= 0) { // operator + _index++; + + return new Token( + start, + _index, + TokenType.Operator, + _text.substring(start, _index) + ); + } else if ("()[],".indexOf(c) >= 0) { // delimiter + _index++; + + return new Token( + start, + _index, + TokenType.Delimiter, + _text.substring(start, _index) + ); + } else if (c == '!' && _index < _limit - 1 && _text.charAt(_index + 1) == '=') { + _index += 2; + return new Token( + start, + _index, + TokenType.Operator, + _text.substring(start, _index) + ); + } else if (c == '<') { + if (_index < _limit - 1 && + (_text.charAt(_index + 1) == '=' || + _text.charAt(_index + 1) == '>')) { + + _index += 2; + return new Token( + start, + _index, + TokenType.Operator, + _text.substring(start, _index) + ); + } else { + _index++; + return new Token( + start, + _index, + TokenType.Operator, + _text.substring(start, _index) + ); + } + } else if (">=".indexOf(c) >= 0) { // operator + if (_index < _limit - 1 && _text.charAt(_index + 1) == '=') { + _index += 2; + return new Token( + start, + _index, + TokenType.Operator, + _text.substring(start, _index) + ); + } else { + _index++; + return new Token( + start, + _index, + TokenType.Operator, + _text.substring(start, _index) + ); + } + } else { + _index++; + detail = "Unrecognized symbol"; + } + + return new ErrorToken( + start, + _index, + _text.substring(start, _index), + detail + ); + } +} diff --git a/main/src/com/google/refine/gel/ast/ControlCallExpr.java b/main/src/com/google/refine/gel/ast/ControlCallExpr.java new file mode 100644 index 000000000..6c66f14e4 --- /dev/null +++ b/main/src/com/google/refine/gel/ast/ControlCallExpr.java @@ -0,0 +1,37 @@ +package com.google.refine.gel.ast; + +import java.util.Properties; + +import com.google.refine.expr.Evaluable; +import com.google.refine.gel.Control; + +/** + * An abstract syntax tree node encapsulating a control call, such as "if". + */ +public class ControlCallExpr implements Evaluable { + final protected Evaluable[] _args; + final protected Control _control; + + public ControlCallExpr(Evaluable[] args, Control c) { + _args = args; + _control = c; + } + + public Object evaluate(Properties bindings) { + return _control.call(bindings, _args); + } + + @Override + public String toString() { + StringBuffer sb = new StringBuffer(); + + for (Evaluable ev : _args) { + if (sb.length() > 0) { + sb.append(", "); + } + sb.append(ev.toString()); + } + + return _control.getClass().getSimpleName() + "(" + sb.toString() + ")"; + } +} diff --git a/main/src/com/google/refine/gel/ast/FieldAccessorExpr.java b/main/src/com/google/refine/gel/ast/FieldAccessorExpr.java new file mode 100644 index 000000000..79ddfeab6 --- /dev/null +++ b/main/src/com/google/refine/gel/ast/FieldAccessorExpr.java @@ -0,0 +1,50 @@ +package com.google.refine.gel.ast; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; + +import com.google.refine.expr.EvalError; +import com.google.refine.expr.Evaluable; +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.expr.HasFields; + +/** + * An abstract syntax tree node encapsulating a field accessor, + * e.g., "cell.value" is accessing the field named "value" on the + * variable called "cell". + */ +public class FieldAccessorExpr implements Evaluable { + final protected Evaluable _inner; + final protected String _fieldName; + + public FieldAccessorExpr(Evaluable inner, String fieldName) { + _inner = inner; + _fieldName = fieldName; + } + + public Object evaluate(Properties bindings) { + Object o = _inner.evaluate(bindings); + if (ExpressionUtils.isError(o)) { + return o; // bubble the error up + } else if (o == null) { + return new EvalError("Cannot retrieve field from null"); + } else if (o instanceof HasFields) { + return ((HasFields) o).getField(_fieldName, bindings); + } else if (o instanceof JSONObject) { + try { + return ((JSONObject) o).get(_fieldName); + } catch (JSONException e) { + return new EvalError("Object does not have any field, including " + _fieldName); + } + } else { + return new EvalError("Object does not have any field, including " + _fieldName); + } + } + + @Override + public String toString() { + return _inner.toString() + "." + _fieldName; + } +} diff --git a/main/src/com/google/refine/gel/ast/FunctionCallExpr.java b/main/src/com/google/refine/gel/ast/FunctionCallExpr.java new file mode 100644 index 000000000..2f4294e7e --- /dev/null +++ b/main/src/com/google/refine/gel/ast/FunctionCallExpr.java @@ -0,0 +1,48 @@ +package com.google.refine.gel.ast; + +import java.util.Properties; + +import com.google.refine.expr.Evaluable; +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.gel.Function; + +/** + * An abstract syntax tree node encapsulating a function call. The function's + * arguments are all evaluated down to values before the function is applied. + * If any argument is an error, the function is not applied, and the error is + * the result of the expression. + */ +public class FunctionCallExpr implements Evaluable { + final protected Evaluable[] _args; + final protected Function _function; + + public FunctionCallExpr(Evaluable[] args, Function f) { + _args = args; + _function = f; + } + + public Object evaluate(Properties bindings) { + Object[] args = new Object[_args.length]; + for (int i = 0; i < _args.length; i++) { + Object v = _args[i].evaluate(bindings); + if (ExpressionUtils.isError(v)) { + return v; // bubble up the error + } + args[i] = v; + } + return _function.call(bindings, args); + } + + public String toString() { + StringBuffer sb = new StringBuffer(); + + for (Evaluable ev : _args) { + if (sb.length() > 0) { + sb.append(", "); + } + sb.append(ev.toString()); + } + + return _function.getClass().getSimpleName() + "(" + sb.toString() + ")"; + } +} diff --git a/main/src/com/google/refine/gel/ast/LiteralExpr.java b/main/src/com/google/refine/gel/ast/LiteralExpr.java new file mode 100644 index 000000000..982545aae --- /dev/null +++ b/main/src/com/google/refine/gel/ast/LiteralExpr.java @@ -0,0 +1,26 @@ +package com.google.refine.gel.ast; + +import java.util.Properties; + +import org.json.JSONObject; + +import com.google.refine.expr.Evaluable; + +/** + * An abstract syntax tree node encapsulating a literal value. + */ +public class LiteralExpr implements Evaluable { + final protected Object _value; + + public LiteralExpr(Object value) { + _value = value; + } + + public Object evaluate(Properties bindings) { + return _value; + } + + public String toString() { + return _value instanceof String ? JSONObject.quote((String) _value) : _value.toString(); + } +} diff --git a/main/src/com/google/refine/gel/ast/OperatorCallExpr.java b/main/src/com/google/refine/gel/ast/OperatorCallExpr.java new file mode 100644 index 000000000..e28183e70 --- /dev/null +++ b/main/src/com/google/refine/gel/ast/OperatorCallExpr.java @@ -0,0 +1,89 @@ +package com.google.refine.gel.ast; + +import java.util.Properties; + +import com.google.refine.expr.Evaluable; +import com.google.refine.expr.ExpressionUtils; + +/** + * An abstract syntax tree node encapsulating an operator call, such as "+". + */ +public class OperatorCallExpr implements Evaluable { + final protected Evaluable[] _args; + final protected String _op; + + public OperatorCallExpr(Evaluable[] args, String op) { + _args = args; + _op = op; + } + + public Object evaluate(Properties bindings) { + Object[] args = new Object[_args.length]; + for (int i = 0; i < _args.length; i++) { + Object v = _args[i].evaluate(bindings); + if (ExpressionUtils.isError(v)) { + return v; + } + args[i] = v; + } + + if (args.length == 2) { + if (args[0] != null && args[1] != null) { + if (args[0] instanceof Number && args[1] instanceof Number) { + if ("+".equals(_op)) { + return ((Number) args[0]).doubleValue() + ((Number) args[1]).doubleValue(); + } else if ("-".equals(_op)) { + return ((Number) args[0]).doubleValue() - ((Number) args[1]).doubleValue(); + } else if ("*".equals(_op)) { + return ((Number) args[0]).doubleValue() * ((Number) args[1]).doubleValue(); + } else if ("/".equals(_op)) { + return ((Number) args[0]).doubleValue() / ((Number) args[1]).doubleValue(); + } else if (">".equals(_op)) { + return ((Number) args[0]).doubleValue() > ((Number) args[1]).doubleValue(); + } else if (">=".equals(_op)) { + return ((Number) args[0]).doubleValue() >= ((Number) args[1]).doubleValue(); + } else if ("<".equals(_op)) { + return ((Number) args[0]).doubleValue() < ((Number) args[1]).doubleValue(); + } else if ("<=".equals(_op)) { + return ((Number) args[0]).doubleValue() <= ((Number) args[1]).doubleValue(); + } + } + + if ("+".equals(_op)) { + return args[0].toString() + args[1].toString(); + } + } + + if ("==".equals(_op)) { + if (args[0] != null) { + return args[0].equals(args[1]); + } else { + return args[1] == null; + } + } else if ("!=".equals(_op)) { + if (args[0] != null) { + return !args[0].equals(args[1]); + } else { + return args[1] != null; + } + } + } + return null; + } + + @Override + public String toString() { + StringBuffer sb = new StringBuffer(); + + for (Evaluable ev : _args) { + if (sb.length() > 0) { + sb.append(' '); + sb.append(_op); + sb.append(' '); + } + sb.append(ev.toString()); + } + + return sb.toString(); + } +} diff --git a/main/src/com/google/refine/gel/ast/VariableExpr.java b/main/src/com/google/refine/gel/ast/VariableExpr.java new file mode 100644 index 000000000..4b7844ae3 --- /dev/null +++ b/main/src/com/google/refine/gel/ast/VariableExpr.java @@ -0,0 +1,28 @@ +package com.google.refine.gel.ast; + +import java.util.Properties; + +import com.google.refine.expr.Evaluable; + +/** + * An abstract syntax tree node encapsulating the retrieval of a variable's content. + */ +public class VariableExpr implements Evaluable { + final protected String _name; + + public VariableExpr(String name) { + _name = name; + } + + public Object evaluate(Properties bindings) { + return bindings.get(_name); + } + + public String toString() { + return _name; + } + + public String getName() { + return _name; + } +} diff --git a/main/src/com/google/refine/gel/controls/Filter.java b/main/src/com/google/refine/gel/controls/Filter.java new file mode 100644 index 000000000..5174a13ec --- /dev/null +++ b/main/src/com/google/refine/gel/controls/Filter.java @@ -0,0 +1,114 @@ +package com.google.refine.gel.controls; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Properties; + +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.EvalError; +import com.google.refine.expr.Evaluable; +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.gel.Control; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.ast.VariableExpr; + +public class Filter implements Control { + public String checkArguments(Evaluable[] args) { + if (args.length != 3) { + return ControlFunctionRegistry.getControlName(this) + " expects 3 arguments"; + } else if (!(args[1] instanceof VariableExpr)) { + return ControlFunctionRegistry.getControlName(this) + + " expects second argument to be a variable name"; + } + return null; + } + + public Object call(Properties bindings, Evaluable[] args) { + Object o = args[0].evaluate(bindings); + if (ExpressionUtils.isError(o)) { + return o; + } else if (!ExpressionUtils.isArrayOrCollection(o) && !(o instanceof JSONArray)) { + return new EvalError("First argument is not an array"); + } + + String name = ((VariableExpr) args[1]).getName(); + + Object oldValue = bindings.get(name); + try { + List results = null; + + if (o.getClass().isArray()) { + Object[] values = (Object[]) o; + + results = new ArrayList(values.length); + for (Object v : values) { + bindings.put(name, v); + + Object r = args[2].evaluate(bindings); + if (r instanceof Boolean && ((Boolean) r).booleanValue()) { + results.add(v); + } + } + } else if (o instanceof JSONArray) { + JSONArray a = (JSONArray) o; + int l = a.length(); + + results = new ArrayList(l); + for (int i = 0; i < l; i++) { + try { + Object v = a.get(i); + + bindings.put(name, v); + + Object r = args[2].evaluate(bindings); + if (r instanceof Boolean && ((Boolean) r).booleanValue()) { + results.add(v); + } + } catch (JSONException e) { + results.add(new EvalError(e.getMessage())); + } + } + } else { + Collection collection = ExpressionUtils.toObjectCollection(o); + + results = new ArrayList(collection.size()); + + for (Object v : collection) { + bindings.put(name, v); + + Object r = args[2].evaluate(bindings); + if (r instanceof Boolean && ((Boolean) r).booleanValue()) { + results.add(v); + } + } + } + + return results.toArray(); + } finally { + /* + * Restore the old value bound to the variable, if any. + */ + if (oldValue != null) { + bindings.put(name, oldValue); + } else { + bindings.remove(name); + } + } + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value( + "Evaluates expression a to an array. Then for each array element, binds its value to variable name v, evaluates expression test which should return a boolean. If the boolean is true, pushes v onto the result array." + ); + writer.key("params"); writer.value("expression a, variable v, expression test"); + writer.key("returns"); writer.value("array"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/gel/controls/ForEach.java b/main/src/com/google/refine/gel/controls/ForEach.java new file mode 100644 index 000000000..7623f0b99 --- /dev/null +++ b/main/src/com/google/refine/gel/controls/ForEach.java @@ -0,0 +1,111 @@ +package com.google.refine.gel.controls; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Properties; + +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.EvalError; +import com.google.refine.expr.Evaluable; +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.gel.Control; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.ast.VariableExpr; + +public class ForEach implements Control { + public String checkArguments(Evaluable[] args) { + if (args.length != 3) { + return ControlFunctionRegistry.getControlName(this) + " expects 3 arguments"; + } else if (!(args[1] instanceof VariableExpr)) { + return ControlFunctionRegistry.getControlName(this) + + " expects second argument to be a variable name"; + } + return null; + } + + public Object call(Properties bindings, Evaluable[] args) { + Object o = args[0].evaluate(bindings); + if (ExpressionUtils.isError(o)) { + return o; + } else if (!ExpressionUtils.isArrayOrCollection(o) && !(o instanceof JSONArray)) { + return new EvalError("First argument to forEach is not an array"); + } + + String name = ((VariableExpr) args[1]).getName(); + + Object oldValue = bindings.get(name); + try { + List results = null; + + if (o.getClass().isArray()) { + Object[] values = (Object[]) o; + + results = new ArrayList(values.length); + for (Object v : values) { + bindings.put(name, v); + + Object r = args[2].evaluate(bindings); + + results.add(r); + } + } else if (o instanceof JSONArray) { + JSONArray a = (JSONArray) o; + int l = a.length(); + + results = new ArrayList(l); + for (int i = 0; i < l; i++) { + try { + Object v = a.get(i); + + bindings.put(name, v); + + Object r = args[2].evaluate(bindings); + + results.add(r); + } catch (JSONException e) { + results.add(new EvalError(e.getMessage())); + } + } + } else { + Collection collection = ExpressionUtils.toObjectCollection(o); + + results = new ArrayList(collection.size()); + + for (Object v : collection) { + bindings.put(name, v); + + Object r = args[2].evaluate(bindings); + + results.add(r); + } + } + + return results.toArray(); + } finally { + /* + * Restore the old value bound to the variable, if any. + */ + if (oldValue != null) { + bindings.put(name, oldValue); + } else { + bindings.remove(name); + } + } + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value( + "Evaluates expression a to an array. Then for each array element, binds its value to variable name v, evaluates expression e, and pushes the result onto the result array." + ); + writer.key("params"); writer.value("expression a, variable v, expression e"); + writer.key("returns"); writer.value("array"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/gel/controls/ForEachIndex.java b/main/src/com/google/refine/gel/controls/ForEachIndex.java new file mode 100644 index 000000000..08a6e11b7 --- /dev/null +++ b/main/src/com/google/refine/gel/controls/ForEachIndex.java @@ -0,0 +1,128 @@ +package com.google.refine.gel.controls; + +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.EvalError; +import com.google.refine.expr.Evaluable; +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.gel.Control; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.ast.VariableExpr; + +public class ForEachIndex implements Control { + public String checkArguments(Evaluable[] args) { + if (args.length != 4) { + return ControlFunctionRegistry.getControlName(this) + " expects 4 arguments"; + } else if (!(args[1] instanceof VariableExpr)) { + return ControlFunctionRegistry.getControlName(this) + + " expects second argument to be the index's variable name"; + } else if (!(args[2] instanceof VariableExpr)) { + return ControlFunctionRegistry.getControlName(this) + + " expects third argument to be the element's variable name"; + } + return null; + } + + public Object call(Properties bindings, Evaluable[] args) { + Object o = args[0].evaluate(bindings); + if (ExpressionUtils.isError(o)) { + return o; + } else if (!ExpressionUtils.isArrayOrCollection(o) && !(o instanceof JSONArray)) { + return new EvalError("First argument to forEach is not an array"); + } + + String indexName = ((VariableExpr) args[1]).getName(); + String elementName = ((VariableExpr) args[2]).getName(); + + Object oldIndexValue = bindings.get(indexName); + Object oldElementValue = bindings.get(elementName); + try { + List results = null; + + if (o.getClass().isArray()) { + Object[] values = (Object[]) o; + + results = new ArrayList(values.length); + + for (int i = 0; i < values.length; i++) { + Object v = values[i]; + + bindings.put(indexName, i); + bindings.put(elementName, v); + + Object r = args[3].evaluate(bindings); + + results.add(r); + } + } else if (o instanceof JSONArray) { + JSONArray a = (JSONArray) o; + int l = a.length(); + + results = new ArrayList(l); + for (int i = 0; i < l; i++) { + try { + Object v = a.get(i); + + bindings.put(indexName, i); + bindings.put(elementName, v); + + Object r = args[3].evaluate(bindings); + + results.add(r); + } catch (JSONException e) { + results.add(new EvalError(e.getMessage())); + } + } + } else { + List list = ExpressionUtils.toObjectList(o); + + results = new ArrayList(list.size()); + + for (int i = 0; i < list.size(); i++) { + Object v = list.get(i); + + bindings.put(indexName, i); + bindings.put(elementName, v); + + Object r = args[3].evaluate(bindings); + + results.add(r); + } + } + + return results.toArray(); + } finally { + /* + * Restore the old values bound to the variables, if any. + */ + if (oldIndexValue != null) { + bindings.put(indexName, oldIndexValue); + } else { + bindings.remove(indexName); + } + if (oldElementValue != null) { + bindings.put(elementName, oldElementValue); + } else { + bindings.remove(elementName); + } + } + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value( + "Evaluates expression a to an array. Then for each array element, binds its index to variable i and its value to variable name v, evaluates expression e, and pushes the result onto the result array." + ); + writer.key("params"); writer.value("expression a, variable i, variable v, expression e"); + writer.key("returns"); writer.value("array"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/gel/controls/ForNonBlank.java b/main/src/com/google/refine/gel/controls/ForNonBlank.java new file mode 100644 index 000000000..388f7f109 --- /dev/null +++ b/main/src/com/google/refine/gel/controls/ForNonBlank.java @@ -0,0 +1,64 @@ +package com.google.refine.gel.controls; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.Evaluable; +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.gel.Control; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.ast.VariableExpr; + +public class ForNonBlank implements Control { + public String checkArguments(Evaluable[] args) { + if (args.length != 4) { + return ControlFunctionRegistry.getControlName(this) + " expects 4 arguments"; + } else if (!(args[1] instanceof VariableExpr)) { + return ControlFunctionRegistry.getControlName(this) + + " expects second argument to be a variable name"; + } + return null; + } + + public Object call(Properties bindings, Evaluable[] args) { + Object o = args[0].evaluate(bindings); + + Evaluable var = args[1]; + String name = ((VariableExpr) var).getName(); + + if (ExpressionUtils.isNonBlankData(o)) { + Object oldValue = bindings.get(name); + bindings.put(name, o); + + try { + return args[2].evaluate(bindings); + } finally { + /* + * Restore the old value bound to the variable, if any. + */ + if (oldValue != null) { + bindings.put(name, oldValue); + } else { + bindings.remove(name); + } + } + } else { + return args[3].evaluate(bindings); + } + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value( + "Evaluates expression o. If it is non-blank, binds its value to variable name v, evaluates expression eNonBlank and returns the result. " + + "Otherwise (if o evaluates to blank), evaluates expression eBlank and returns that result instead." + ); + writer.key("params"); writer.value("expression o, variable v, expression eNonBlank, expression eBlank"); + writer.key("returns"); writer.value("Depends on actual arguments"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/gel/controls/ForRange.java b/main/src/com/google/refine/gel/controls/ForRange.java new file mode 100644 index 000000000..7169a855d --- /dev/null +++ b/main/src/com/google/refine/gel/controls/ForRange.java @@ -0,0 +1,110 @@ +package com.google.refine.gel.controls; + +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.EvalError; +import com.google.refine.expr.Evaluable; +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.gel.Control; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.ast.VariableExpr; + +public class ForRange implements Control { + public String checkArguments(Evaluable[] args) { + if (args.length != 5) { + return ControlFunctionRegistry.getControlName(this) + " expects 5 arguments"; + } else if (!(args[3] instanceof VariableExpr)) { + return ControlFunctionRegistry.getControlName(this) + + " expects third argument to be the element's variable name"; + } + return null; + } + + public Object call(Properties bindings, Evaluable[] args) { + Object fromO = args[0].evaluate(bindings); + Object toO = args[1].evaluate(bindings); + Object stepO = args[2].evaluate(bindings); + + if (ExpressionUtils.isError(fromO)) { + return fromO; + } else if (ExpressionUtils.isError(toO)) { + return toO; + } else if (ExpressionUtils.isError(stepO)) { + return stepO; + } else if (!(fromO instanceof Number) || !(toO instanceof Number) || !(stepO instanceof Number)) { + return new EvalError("First, second, and third arguments of forRange must all be numbers"); + } + + String indexName = ((VariableExpr) args[3]).getName(); + Object oldIndexValue = bindings.get(indexName); + + try { + List results = new ArrayList(); + + if (isIntegral((Number) fromO) && isIntegral((Number) stepO)) { + long from = ((Number) fromO).longValue(); + long step = ((Number) stepO).longValue(); + double to = ((Number) toO).doubleValue(); + + while (from < to) { + bindings.put(indexName, from); + + Object r = args[4].evaluate(bindings); + + results.add(r); + + from += step; + } + } else { + double from = ((Number) fromO).longValue(); + double step = ((Number) stepO).longValue(); + double to = ((Number) toO).doubleValue(); + + while (from < to) { + bindings.put(indexName, from); + + Object r = args[4].evaluate(bindings); + + results.add(r); + + from += step; + } + } + return results.toArray(); + } finally { + /* + * Restore the old values bound to the variables, if any. + */ + if (oldIndexValue != null) { + bindings.put(indexName, oldIndexValue); + } else { + bindings.remove(indexName); + } + } + } + + static private boolean isIntegral(Number o) { + if (o instanceof Integer || o instanceof Long) { + return true; + } else { + return (o.doubleValue() - o.longValue()) == 0; + } + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value( + "Iterates over the variable v starting at \"from\", incrementing by \"step\" each time while less than \"to\". At each iteration, evaluates expression e, and pushes the result onto the result array." + ); + writer.key("params"); writer.value("number from, number to, number step, variable v, expression e"); + writer.key("returns"); writer.value("array"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/gel/controls/If.java b/main/src/com/google/refine/gel/controls/If.java new file mode 100644 index 000000000..c01581daa --- /dev/null +++ b/main/src/com/google/refine/gel/controls/If.java @@ -0,0 +1,44 @@ +package com.google.refine.gel.controls; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.Evaluable; +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.gel.Control; +import com.google.refine.gel.ControlFunctionRegistry; + +public class If implements Control { + public String checkArguments(Evaluable[] args) { + if (args.length != 3) { + return ControlFunctionRegistry.getControlName(this) + " expects 3 arguments"; + } + return null; + } + + public Object call(Properties bindings, Evaluable[] args) { + Object o = args[0].evaluate(bindings); + if (ExpressionUtils.isError(o)) { + return o; // bubble the error up + } else if (ExpressionUtils.isTrue(o)) { + return args[1].evaluate(bindings); + } else { + return args[2].evaluate(bindings); + } + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value( + "Evaluates expression o. If it is true, evaluates expression eTrue and returns the result. " + + "Otherwise, evaluates expression eFalse and returns that result instead." + ); + writer.key("params"); writer.value("expression o, expression eTrue, expression eFalse"); + writer.key("returns"); writer.value("Depends on actual arguments"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/gel/controls/IsBlank.java b/main/src/com/google/refine/gel/controls/IsBlank.java new file mode 100644 index 000000000..6d9a6bb5a --- /dev/null +++ b/main/src/com/google/refine/gel/controls/IsBlank.java @@ -0,0 +1,15 @@ +package com.google.refine.gel.controls; + +import com.google.refine.expr.ExpressionUtils; + +public class IsBlank extends IsTest { + @Override + protected String getDescription() { + return "Returns whether o is null or an empty string"; + } + + @Override + protected boolean test(Object o) { + return !ExpressionUtils.isNonBlankData(o); + } +} diff --git a/main/src/com/google/refine/gel/controls/IsError.java b/main/src/com/google/refine/gel/controls/IsError.java new file mode 100644 index 000000000..c60643c4b --- /dev/null +++ b/main/src/com/google/refine/gel/controls/IsError.java @@ -0,0 +1,15 @@ +package com.google.refine.gel.controls; + +import com.google.refine.expr.ExpressionUtils; + +public class IsError extends IsTest { + @Override + protected String getDescription() { + return "Returns whether o is an error"; + } + + @Override + protected boolean test(Object o) { + return ExpressionUtils.isError(o); + } +} diff --git a/main/src/com/google/refine/gel/controls/IsNonBlank.java b/main/src/com/google/refine/gel/controls/IsNonBlank.java new file mode 100644 index 000000000..f25208e65 --- /dev/null +++ b/main/src/com/google/refine/gel/controls/IsNonBlank.java @@ -0,0 +1,15 @@ +package com.google.refine.gel.controls; + +import com.google.refine.expr.ExpressionUtils; + +public class IsNonBlank extends IsTest { + @Override + protected String getDescription() { + return "Returns whether o is not null and not an empty string"; + } + + @Override + protected boolean test(Object o) { + return ExpressionUtils.isNonBlankData(o); + } +} diff --git a/main/src/com/google/refine/gel/controls/IsNotNull.java b/main/src/com/google/refine/gel/controls/IsNotNull.java new file mode 100644 index 000000000..5064a2f2e --- /dev/null +++ b/main/src/com/google/refine/gel/controls/IsNotNull.java @@ -0,0 +1,13 @@ +package com.google.refine.gel.controls; + +public class IsNotNull extends IsTest { + @Override + protected String getDescription() { + return "Returns whether o is not null"; + } + + @Override + protected boolean test(Object o) { + return o != null; + } +} diff --git a/main/src/com/google/refine/gel/controls/IsNull.java b/main/src/com/google/refine/gel/controls/IsNull.java new file mode 100644 index 000000000..b6cb32e62 --- /dev/null +++ b/main/src/com/google/refine/gel/controls/IsNull.java @@ -0,0 +1,13 @@ +package com.google.refine.gel.controls; + +public class IsNull extends IsTest { + @Override + protected String getDescription() { + return "Returns whether o is null"; + } + + @Override + protected boolean test(Object o) { + return o == null; + } +} diff --git a/main/src/com/google/refine/gel/controls/IsNumeric.java b/main/src/com/google/refine/gel/controls/IsNumeric.java new file mode 100644 index 000000000..0791ae1c2 --- /dev/null +++ b/main/src/com/google/refine/gel/controls/IsNumeric.java @@ -0,0 +1,19 @@ +package com.google.refine.gel.controls; + +import org.apache.commons.lang.StringUtils; + +public class IsNumeric extends IsTest { + @Override + protected String getDescription() { + return "Returns whether o can represent a number"; + } + + @Override + protected boolean test(Object o) { + if (o instanceof Number) return true; + + String s = (o instanceof String) ? (String) o : o.toString(); + + return StringUtils.isNumeric(s); + } +} diff --git a/main/src/com/google/refine/gel/controls/IsTest.java b/main/src/com/google/refine/gel/controls/IsTest.java new file mode 100644 index 000000000..e0c37fea7 --- /dev/null +++ b/main/src/com/google/refine/gel/controls/IsTest.java @@ -0,0 +1,39 @@ +package com.google.refine.gel.controls; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.Evaluable; +import com.google.refine.gel.Control; +import com.google.refine.gel.ControlFunctionRegistry; + +abstract class IsTest implements Control { + public String checkArguments(Evaluable[] args) { + if (args.length != 1) { + return ControlFunctionRegistry.getControlName(this) + " expects one argument"; + } + return null; + } + + public Object call(Properties bindings, Evaluable[] args) { + Object o = args[0].evaluate(bindings); + + return test(o); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value(getDescription()); + writer.key("params"); writer.value("expression o"); + writer.key("returns"); writer.value("boolean"); + writer.endObject(); + } + + abstract protected boolean test(Object v); + + abstract protected String getDescription(); +} diff --git a/main/src/com/google/refine/gel/controls/With.java b/main/src/com/google/refine/gel/controls/With.java new file mode 100644 index 000000000..1c38665cb --- /dev/null +++ b/main/src/com/google/refine/gel/controls/With.java @@ -0,0 +1,60 @@ +package com.google.refine.gel.controls; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.expr.Evaluable; +import com.google.refine.gel.Control; +import com.google.refine.gel.ControlFunctionRegistry; +import com.google.refine.gel.ast.VariableExpr; + +public class With implements Control { + public String checkArguments(Evaluable[] args) { + if (args.length != 3) { + return ControlFunctionRegistry.getControlName(this) + " expects 3 arguments"; + } else if (!(args[1] instanceof VariableExpr)) { + return ControlFunctionRegistry.getControlName(this) + + " expects second argument to be a variable name"; + } + return null; + } + + public Object call(Properties bindings, Evaluable[] args) { + Object o = args[0].evaluate(bindings); + String name = ((VariableExpr) args[1]).getName(); + + Object oldValue = bindings.get(name); + try { + if (o != null) { + bindings.put(name, o); + } else { + bindings.remove(name); + } + + return args[2].evaluate(bindings); + } finally { + /* + * Restore the old value bound to the variable, if any. + */ + if (oldValue != null) { + bindings.put(name, oldValue); + } else { + bindings.remove(name); + } + } + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value( + "Evaluates expression o and binds its value to variable name v. Then evaluates expression e and returns that result" + ); + writer.key("params"); writer.value("expression o, variable v, expression e"); + writer.key("returns"); writer.value("Depends on actual arguments"); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/history/Change.java b/main/src/com/google/refine/history/Change.java new file mode 100644 index 000000000..e8b3bc1c5 --- /dev/null +++ b/main/src/com/google/refine/history/Change.java @@ -0,0 +1,21 @@ +package com.google.refine.history; + +import java.io.IOException; +import java.io.Writer; +import java.util.Properties; + +import com.google.refine.model.Project; + +/** + * Interface for a concrete change to a project's data. A change should consist + * of new values already computed. When apply() is called, the change should not + * spend any more time computing anything. It should simply save existing values + * and swap in new values. Similarly, when revert() is called, the change + * should only swap old values back in. + */ +public interface Change { + public void apply(Project project); + public void revert(Project project); + + public void save(Writer writer, Properties options) throws IOException; +} diff --git a/main/src/com/google/refine/history/ChangeSequence.java b/main/src/com/google/refine/history/ChangeSequence.java new file mode 100644 index 000000000..313cf9d1c --- /dev/null +++ b/main/src/com/google/refine/history/ChangeSequence.java @@ -0,0 +1,65 @@ +package com.google.refine.history; + +import java.io.IOException; +import java.io.LineNumberReader; +import java.io.Writer; +import java.util.Properties; + +import com.google.refine.model.Project; +import com.google.refine.util.Pool; + +public class ChangeSequence implements Change { + final protected Change[] _changes; + + public ChangeSequence(Change[] changes) { + _changes = changes; + } + + public void apply(Project project) { + synchronized (project) { + for (int i = 0; i < _changes.length; i++) { + _changes[i].apply(project); + } + } + } + + public void revert(Project project) { + synchronized (project) { + for (int i = _changes.length - 1; i >= 0 ; i--) { + _changes[i].apply(project); + } + } + } + + public void save(Writer writer, Properties options) throws IOException { + writer.write("count="); writer.write(Integer.toString(_changes.length)); writer.write('\n'); + for (int i = 0; i < _changes.length; i++) { + Change change = _changes[i]; + + writer.write(change.getClass().getName()); writer.write('\n'); + + change.save(writer, options); + } + writer.write("/ec/\n"); // end of change marker + } + + static public Change load(LineNumberReader reader, Pool pool) throws Exception { + String line = reader.readLine(); + if (line == null) line = ""; + int equal = line.indexOf('='); + + assert "count".equals(line.substring(0, equal)); + + int count = Integer.parseInt(line.substring(equal + 1)); + Change[] changes = new Change[count]; + + for (int i = 0; i < count; i++) { + changes[i] = History.readOneChange(reader, pool); + } + + line = reader.readLine(); + assert "/ec/".equals(line); + + return new ChangeSequence(changes); + } +} diff --git a/main/src/com/google/refine/history/History.java b/main/src/com/google/refine/history/History.java new file mode 100644 index 000000000..25df89cc8 --- /dev/null +++ b/main/src/com/google/refine/history/History.java @@ -0,0 +1,278 @@ +package com.google.refine.history; + +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.LineNumberReader; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.Writer; +import java.lang.reflect.Method; +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.GridworksServlet; +import com.google.refine.Jsonizable; +import com.google.refine.ProjectManager; +import com.google.refine.model.Project; +import com.google.refine.util.Pool; + +/** + * Track done and undone changes. Done changes can be undone; undone changes can be redone. + * Each change is actually not tracked directly but through a history entry. The history + * entry stores only the metadata, while the change object stores the actual data. Thus + * the history entries are much smaller and can be kept in memory, while the change objects + * are only loaded into memory on demand. + */ +public class History implements Jsonizable { + static public Change readOneChange(InputStream in, Pool pool) throws Exception { + LineNumberReader reader = new LineNumberReader(new InputStreamReader(in)); + try { + return readOneChange(reader, pool); + } finally { + reader.close(); + } + } + + static public Change readOneChange(LineNumberReader reader, Pool pool) throws Exception { + /* String version = */ reader.readLine(); + + String className = reader.readLine(); + Class klass = getChangeClass(className); + + Method load = klass.getMethod("load", LineNumberReader.class, Pool.class); + + return (Change) load.invoke(null, reader, pool); + } + + static public void writeOneChange(OutputStream out, Change change, Pool pool) throws IOException { + Writer writer = new OutputStreamWriter(out); + try { + History.writeOneChange(writer, change, pool); + } finally { + writer.flush(); + } + } + + static public void writeOneChange(Writer writer, Change change, Pool pool) throws IOException { + Properties options = new Properties(); + options.setProperty("mode", "save"); + options.put("pool", pool); + + writeOneChange(writer, change, options); + } + + static public void writeOneChange(Writer writer, Change change, Properties options) throws IOException { + writer.write(GridworksServlet.getVersion()); writer.write('\n'); + writer.write(change.getClass().getName()); writer.write('\n'); + + change.save(writer, options); + } + + @SuppressWarnings("unchecked") + static public Class getChangeClass(String className) throws ClassNotFoundException { + return (Class) GridworksServlet.getClass(className); + } + + protected long _projectID; + protected List _pastEntries; // done changes, can be undone + protected List _futureEntries; // undone changes, can be redone + + public History(Project project) { + _projectID = project.id; + _pastEntries = new ArrayList(); + _futureEntries = new ArrayList(); + } + + /** + * Adds a HistoryEntry to the list of past histories + * Adding a new entry clears all currently held future histories + * @param entry + */ + synchronized public void addEntry(HistoryEntry entry) { + entry.apply(ProjectManager.singleton.getProject(_projectID)); + _pastEntries.add(entry); + + setModified(); + + // Any new change will clear all future entries. + List futureEntries = _futureEntries; + _futureEntries = new ArrayList(); + + for (HistoryEntry entry2 : futureEntries) { + try { + // remove residual data on disk + entry2.delete(); + } catch (Exception e) { + e.printStackTrace(); + } + } + } + + protected void setModified() { + ProjectManager.singleton.getProjectMetadata(_projectID).updateModified(); + } + + synchronized public List getLastPastEntries(int count) { + if (count <= 0) { + return new LinkedList(_pastEntries); + } else { + return _pastEntries.subList(Math.max(_pastEntries.size() - count, 0), _pastEntries.size()); + } + } + + synchronized public void undoRedo(long lastDoneEntryID) { + if (lastDoneEntryID == 0) { + // undo all the way back to the start of the project + undo(_pastEntries.size()); + } else { + for (int i = 0; i < _pastEntries.size(); i++) { + if (_pastEntries.get(i).id == lastDoneEntryID) { + undo(_pastEntries.size() - i - 1); + return; + } + } + + for (int i = 0; i < _futureEntries.size(); i++) { + if (_futureEntries.get(i).id == lastDoneEntryID) { + redo(i + 1); + return; + } + } + } + } + + synchronized public long getPrecedingEntryID(long entryID) { + if (entryID == 0) { + return -1; + } else { + for (int i = 0; i < _pastEntries.size(); i++) { + if (_pastEntries.get(i).id == entryID) { + return i == 0 ? 0 : _pastEntries.get(i - 1).id; + } + } + + for (int i = 0; i < _futureEntries.size(); i++) { + if (_futureEntries.get(i).id == entryID) { + if (i > 0) { + return _futureEntries.get(i - 1).id; + } else if (_pastEntries.size() > 0) { + return _pastEntries.get(_pastEntries.size() - 1).id; + } else { + return 0; + } + } + } + } + return -1; + } + + protected HistoryEntry getEntry(long entryID) { + for (int i = 0; i < _pastEntries.size(); i++) { + if (_pastEntries.get(i).id == entryID) { + return _pastEntries.get(i); + } + } + + for (int i = 0; i < _futureEntries.size(); i++) { + if (_futureEntries.get(i).id == entryID) { + return _futureEntries.get(i); + } + } + return null; + } + + protected void undo(int times) { + Project project = ProjectManager.singleton.getProject(_projectID); + + while (times > 0 && _pastEntries.size() > 0) { + HistoryEntry entry = _pastEntries.get(_pastEntries.size() - 1); + + entry.revert(project); + + setModified(); + times--; + + _pastEntries.remove(_pastEntries.size() - 1); + _futureEntries.add(0, entry); + } + } + + protected void redo(int times) { + Project project = ProjectManager.singleton.getProject(_projectID); + + while (times > 0 && _futureEntries.size() > 0) { + HistoryEntry entry = _futureEntries.get(0); + + entry.apply(project); + + setModified(); + times--; + + _pastEntries.add(entry); + _futureEntries.remove(0); + } + } + + synchronized public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + + writer.key("past"); writer.array(); + for (HistoryEntry entry : _pastEntries) { + entry.write(writer, options); + } + writer.endArray(); + + writer.key("future"); writer.array(); + for (HistoryEntry entry : _futureEntries) { + entry.write(writer, options); + } + writer.endArray(); + + writer.endObject(); + } + + synchronized public void save(Writer writer, Properties options) throws IOException { + writer.write("pastEntryCount="); writer.write(Integer.toString(_pastEntries.size())); writer.write('\n'); + for (HistoryEntry entry : _pastEntries) { + entry.save(writer, options); writer.write('\n'); + } + + writer.write("futureEntryCount="); writer.write(Integer.toString(_futureEntries.size())); writer.write('\n'); + for (HistoryEntry entry : _futureEntries) { + entry.save(writer, options); writer.write('\n'); + } + + writer.write("/e/\n"); + } + + synchronized public void load(Project project, LineNumberReader reader) throws Exception { + String line; + while ((line = reader.readLine()) != null && !"/e/".equals(line)) { + int equal = line.indexOf('='); + CharSequence field = line.subSequence(0, equal); + String value = line.substring(equal + 1); + + if ("pastEntryCount".equals(field)) { + int count = Integer.parseInt(value); + + for (int i = 0; i < count; i++) { + _pastEntries.add(HistoryEntry.load(project, reader.readLine())); + } + } else if ("futureEntryCount".equals(field)) { + int count = Integer.parseInt(value); + + for (int i = 0; i < count; i++) { + _futureEntries.add(HistoryEntry.load(project, reader.readLine())); + } + } + } + } +} diff --git a/main/src/com/google/refine/history/HistoryEntry.java b/main/src/com/google/refine/history/HistoryEntry.java new file mode 100644 index 000000000..3f1b81c21 --- /dev/null +++ b/main/src/com/google/refine/history/HistoryEntry.java @@ -0,0 +1,139 @@ +package com.google.refine.history; + +import java.io.Writer; +import java.util.Date; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.Jsonizable; +import com.google.refine.ProjectManager; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; +import com.google.refine.operations.OperationRegistry; +import com.google.refine.util.ParsingUtilities; + +/** + * This is the metadata of a Change. It's small, so we can load it in order to + * obtain information about a change without actually loading the change. + */ +public class HistoryEntry implements Jsonizable { + final public long id; + final public long projectID; + final public String description; + final public Date time; + + // the manager (deals with IO systems or databases etc.) + final public HistoryEntryManager _manager; + + // the abstract operation, if any, that results in the change + final public AbstractOperation operation; + + // the actual change, loaded on demand + private transient Change _change; + + private final static String OPERATION = "operation"; + + public void setChange(Change _change) { + this._change = _change; + } + + public Change getChange() { + return _change; + } + + static public long allocateID() { + return Math.round(Math.random() * 1000000) + System.currentTimeMillis(); + } + + public HistoryEntry(long id, Project project, String description, AbstractOperation operation, Change change) { + this.id = id; + this.projectID = project.id; + this.description = description; + this.operation = operation; + this.time = new Date(); + + this._manager = ProjectManager.singleton.getHistoryEntryManager(); + setChange(change); + } + + protected HistoryEntry(long id, long projectID, String description, AbstractOperation operation, Date time) { + this.id = id; + this.projectID = projectID; + this.description = description; + this.operation = operation; + this.time = time; + this._manager = ProjectManager.singleton.getHistoryEntryManager(); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("id"); writer.value(id); + writer.key("description"); writer.value(description); + writer.key("time"); writer.value(ParsingUtilities.dateToString(time)); + if ("save".equals(options.getProperty("mode")) && operation != null) { + writer.key(OPERATION); operation.write(writer, options); + } + writer.endObject(); + } + + public void save(Writer writer, Properties options){ + _manager.save(this, writer, options); + } + + public void apply(Project project) { + if (getChange() == null) { + ProjectManager.singleton.getHistoryEntryManager().loadChange(this); + } + + synchronized (project) { + getChange().apply(project); + + // When a change is applied, it can hang on to old data (in order to be able + // to revert later). Hence, we need to save the change out. + + try { + _manager.saveChange(this); + } catch (Exception e) { + e.printStackTrace(); + + getChange().revert(project); + + throw new RuntimeException("Failed to apply change", e); + } + } + } + + public void revert(Project project) { + if (getChange() == null) { + _manager.loadChange(this); + } + getChange().revert(project); + } + + static public HistoryEntry load(Project project, String s) throws Exception { + JSONObject obj = ParsingUtilities.evaluateJsonStringToObject(s); + + AbstractOperation operation = null; + if (obj.has(OPERATION) && !obj.isNull(OPERATION)) { + operation = OperationRegistry.reconstruct(project, obj.getJSONObject(OPERATION)); + } + + return new HistoryEntry( + obj.getLong("id"), + project.id, + obj.getString("description"), + operation, + ParsingUtilities.stringToDate(obj.getString("time")) + ); + } + + public void delete(){ + _manager.delete(this); + } + +} diff --git a/main/src/com/google/refine/history/HistoryEntryManager.java b/main/src/com/google/refine/history/HistoryEntryManager.java new file mode 100644 index 000000000..0af966826 --- /dev/null +++ b/main/src/com/google/refine/history/HistoryEntryManager.java @@ -0,0 +1,12 @@ +package com.google.refine.history; + +import java.io.Writer; +import java.util.Properties; + + +public interface HistoryEntryManager { + public void loadChange(HistoryEntry historyEntry); + public void saveChange(HistoryEntry historyEntry) throws Exception; + public void save(HistoryEntry historyEntry, Writer writer, Properties options); + public void delete(HistoryEntry historyEntry); +} diff --git a/main/src/com/google/refine/history/HistoryProcess.java b/main/src/com/google/refine/history/HistoryProcess.java new file mode 100644 index 000000000..24cc9783a --- /dev/null +++ b/main/src/com/google/refine/history/HistoryProcess.java @@ -0,0 +1,73 @@ +package com.google.refine.history; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.model.Project; +import com.google.refine.process.Process; +import com.google.refine.process.ProcessManager; + +/** + * The process for undoing or redoing. This involves calling apply() and revert() + * on changes. + */ +public class HistoryProcess extends Process { + final protected Project _project; + final protected long _lastDoneID; + final protected String _description; + + protected boolean _done = false; + + private final static String WARN = "Not a long-running process"; + + public HistoryProcess(Project project, long lastDoneID) { + _project = project; + _lastDoneID = lastDoneID; + + if (_lastDoneID == 0) { + _description = "Undo all"; + } else { + HistoryEntry entry = _project.history.getEntry(_lastDoneID); + _description = "Undo/redo until after " + entry.description; + } + } + + public void cancel() { + throw new RuntimeException(WARN); + } + + public boolean isImmediate() { + return true; + } + + public HistoryEntry performImmediate() { + _project.history.undoRedo(_lastDoneID); + _done = true; + + return null; + } + + public void startPerforming(ProcessManager manager) { + throw new RuntimeException(WARN); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("description"); writer.value(_description); + writer.key("immediate"); writer.value(true); + writer.key("status"); writer.value(_done ? "done" : "pending"); + writer.endObject(); + } + + public boolean isDone() { + throw new RuntimeException(WARN); + } + + public boolean isRunning() { + throw new RuntimeException(WARN); + } +} diff --git a/main/src/com/google/refine/importers/ExcelImporter.java b/main/src/com/google/refine/importers/ExcelImporter.java new file mode 100644 index 000000000..69a956492 --- /dev/null +++ b/main/src/com/google/refine/importers/ExcelImporter.java @@ -0,0 +1,304 @@ +package com.google.refine.importers; + +import java.io.IOException; +import java.io.InputStream; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.Set; + +import org.apache.poi.common.usermodel.Hyperlink; +import org.apache.poi.hssf.usermodel.HSSFDateUtil; +import org.apache.poi.hssf.usermodel.HSSFWorkbook; +import org.apache.poi.poifs.filesystem.POIFSFileSystem; +import org.apache.poi.ss.usermodel.Sheet; +import org.apache.poi.ss.usermodel.Workbook; +import org.apache.poi.xssf.usermodel.XSSFWorkbook; + +import com.google.refine.ProjectMetadata; +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.Recon; +import com.google.refine.model.ReconCandidate; +import com.google.refine.model.Row; +import com.google.refine.model.Recon.Judgment; + +public class ExcelImporter implements StreamImporter { + protected boolean _xmlBased; + + @Override + public void read(InputStream inputStream, Project project, ProjectMetadata metadata, Properties options) throws ImportException { + int ignoreLines = ImporterUtilities.getIntegerOption("ignore", options, -1); + int headerLines = ImporterUtilities.getIntegerOption("header-lines", options, 1); + int limit = ImporterUtilities.getIntegerOption("limit", options, -1); + int skip = ImporterUtilities.getIntegerOption("skip", options, 0); + + Workbook wb = null; + try { + wb = _xmlBased ? + new XSSFWorkbook(inputStream) : + new HSSFWorkbook(new POIFSFileSystem(inputStream)); + } catch (IOException e) { + throw new ImportException( + "Attempted to parse file as Excel file but failed. " + + "Try to use Excel to re-save the file as a different Excel version or as TSV and upload again.", + e + ); + } + + Sheet sheet = wb.getSheetAt(0); + + int firstRow = sheet.getFirstRowNum(); + int lastRow = sheet.getLastRowNum(); + + List columnNames = new ArrayList(); + Set columnNameSet = new HashSet(); + Map columnRootNameToIndex = new HashMap(); + + int rowsWithData = 0; + Map reconMap = new HashMap(); + + for (int r = firstRow; r <= lastRow; r++) { + org.apache.poi.ss.usermodel.Row row = sheet.getRow(r); + if (row == null) { + continue; + } else if (ignoreLines > 0) { + ignoreLines--; + continue; + } + + short firstCell = row.getFirstCellNum(); + short lastCell = row.getLastCellNum(); + if (firstCell < 0 || firstCell > lastCell) { + continue; + } + + /* + * Still processing header lines + */ + if (headerLines > 0) { + headerLines--; + + for (int c = firstCell; c <= lastCell; c++) { + org.apache.poi.ss.usermodel.Cell cell = row.getCell(c); + if (cell != null) { + Serializable value = extractCell(cell); + String text = value != null ? value.toString() : null; + if (text != null && text.length() > 0) { + while (columnNames.size() < c + 1) { + columnNames.add(null); + } + + String existingName = columnNames.get(c); + String name = (existingName == null) ? text : (existingName + " " + text); + + columnNames.set(c, name); + } + } + } + + if (headerLines == 0) { + for (int i = 0; i < columnNames.size(); i++) { + String rootName = columnNames.get(i); + if (rootName == null) { + continue; + } + setUnduplicatedColumnName(rootName, columnNames, i, columnNameSet, columnRootNameToIndex); + } + } + + /* + * Processing data rows + */ + } else { + Row newRow = new Row(columnNames.size()); + boolean hasData = false; + + for (int c = firstCell; c <= lastCell; c++) { + org.apache.poi.ss.usermodel.Cell cell = row.getCell(c); + if (cell == null) { + continue; + } + + Cell ourCell = extractCell(cell, reconMap); + if (ourCell != null) { + while (columnNames.size() < c + 1) { + columnNames.add(null); + } + if (columnNames.get(c) == null) { + setUnduplicatedColumnName("Column", columnNames, c, columnNameSet, columnRootNameToIndex); + } + + newRow.setCell(c, ourCell); + hasData = true; + } + } + + if (hasData) { + rowsWithData++; + + if (skip <= 0 || rowsWithData > skip) { + project.rows.add(newRow); + project.columnModel.setMaxCellIndex(newRow.cells.size()); + + if (limit > 0 && project.rows.size() >= limit) { + break; + } + } + } + } + } + + /* + * Create columns + */ + for (int c = 0; c < columnNames.size(); c++) { + String name = columnNames.get(c); + if (name != null) { + Column column = new Column(c, name); + project.columnModel.columns.add(column); + } + } + } + + protected void setUnduplicatedColumnName( + String rootName, List columnNames, int index, Set columnNameSet, Map columnRootNameToIndex) { + if (columnNameSet.contains(rootName)) { + int startIndex = columnRootNameToIndex.containsKey(rootName) ? columnRootNameToIndex.get(rootName) : 2; + while (true) { + String name = rootName + " " + startIndex; + if (columnNameSet.contains(name)) { + startIndex++; + } else { + columnNames.set(index, name); + columnNameSet.add(name); + break; + } + } + + columnRootNameToIndex.put(rootName, startIndex + 1); + } else { + columnNames.set(index, rootName); + columnNameSet.add(rootName); + } + } + + protected Serializable extractCell(org.apache.poi.ss.usermodel.Cell cell) { + int cellType = cell.getCellType(); + if (cellType == org.apache.poi.ss.usermodel.Cell.CELL_TYPE_ERROR || + cellType == org.apache.poi.ss.usermodel.Cell.CELL_TYPE_BLANK) { + return null; + } + if (cellType == org.apache.poi.ss.usermodel.Cell.CELL_TYPE_FORMULA) { + cellType = cell.getCachedFormulaResultType(); + } + + Serializable value = null; + if (cellType == org.apache.poi.ss.usermodel.Cell.CELL_TYPE_BOOLEAN) { + value = cell.getBooleanCellValue(); + } else if (cellType == org.apache.poi.ss.usermodel.Cell.CELL_TYPE_NUMERIC) { + double d = cell.getNumericCellValue(); + + if (HSSFDateUtil.isCellDateFormatted(cell)) { + value = HSSFDateUtil.getJavaDate(d); + } else { + value = d; + } + } else { + String text = cell.getStringCellValue().trim(); + if (text.length() > 0) { + value = text; + } + } + + return value; + } + + protected Cell extractCell(org.apache.poi.ss.usermodel.Cell cell, Map reconMap) { + Serializable value = extractCell(cell); + + if (value != null) { + Recon recon = null; + + Hyperlink hyperlink = cell.getHyperlink(); + if (hyperlink != null) { + String url = hyperlink.getAddress(); + + if (url.startsWith("http://") || + url.startsWith("https://")) { + + final String sig = "freebase.com/view"; + + int i = url.indexOf(sig); + if (i > 0) { + String id = url.substring(i + sig.length()); + + int q = id.indexOf('?'); + if (q > 0) { + id = id.substring(0, q); + } + int h = id.indexOf('#'); + if (h > 0) { + id = id.substring(0, h); + } + + if (reconMap.containsKey(id)) { + recon = reconMap.get(id); + recon.judgmentBatchSize++; + } else { + recon = new Recon(0, null, null); + recon.service = "import"; + recon.match = new ReconCandidate(id, value.toString(), new String[0], 100); + recon.matchRank = 0; + recon.judgment = Judgment.Matched; + recon.judgmentAction = "auto"; + recon.judgmentBatchSize = 1; + recon.addCandidate(recon.match); + + reconMap.put(id, recon); + } + + } + } + } + + return new Cell(value, recon); + } else { + return null; + } + } + + @Override + public boolean canImportData(String contentType, String fileName) { + if (contentType != null) { + contentType = contentType.toLowerCase().trim(); + if ("application/msexcel".equals(contentType) || + "application/x-msexcel".equals(contentType) || + "application/x-ms-excel".equals(contentType) || + "application/vnd.ms-excel".equals(contentType) || + "application/x-excel".equals(contentType) || + "application/xls".equals(contentType)) { + this._xmlBased = false; + return true; + } else if("application/x-xls".equals(contentType)) { + this._xmlBased = true; + return true; + } + } else if (fileName != null) { + fileName = fileName.toLowerCase(); + if (fileName.endsWith(".xls")) { + this._xmlBased = false; + return true; + } else if (fileName.endsWith(".xlsx")) { + this._xmlBased = true; + return true; + } + } + return false; + } +} diff --git a/main/src/com/google/refine/importers/ImportException.java b/main/src/com/google/refine/importers/ImportException.java new file mode 100644 index 000000000..7b3054d8c --- /dev/null +++ b/main/src/com/google/refine/importers/ImportException.java @@ -0,0 +1,15 @@ +package com.google.refine.importers; + +/** + * Exception thrown by importers. Typically contains a nested exception + * indicating the underlying cause of the problem. + */ +public class ImportException extends Exception { + + private static final long serialVersionUID = 7077314805989174181L; + + public ImportException(String message, Throwable cause) { + super(message, cause); + } + +} diff --git a/main/src/com/google/refine/importers/Importer.java b/main/src/com/google/refine/importers/Importer.java new file mode 100644 index 000000000..3b66bcf2c --- /dev/null +++ b/main/src/com/google/refine/importers/Importer.java @@ -0,0 +1,14 @@ +package com.google.refine.importers; + + +public interface Importer { + + /** + * Determine whether importer can handle given contentType and filename. + * + * @param contentType + * @param fileName + * @return true if the importer can handle this + */ + public boolean canImportData(String contentType, String fileName); +} diff --git a/main/src/com/google/refine/importers/ImporterRegistry.java b/main/src/com/google/refine/importers/ImporterRegistry.java new file mode 100644 index 000000000..d9394137d --- /dev/null +++ b/main/src/com/google/refine/importers/ImporterRegistry.java @@ -0,0 +1,103 @@ +package com.google.refine.importers; + +import java.net.URL; +import java.util.HashMap; +import java.util.Map; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +abstract public class ImporterRegistry { + final static Logger logger = LoggerFactory.getLogger("importer-registry"); + + static final private Map importers = new HashMap(); + + private static final String[][] importerNames = { + {"ExcelImporter", "com.google.refine.importers.ExcelImporter"}, + {"XmlImporter", "com.google.refine.importers.XmlImporter"}, + {"RdfTripleImporter", "com.google.refine.importers.RdfTripleImporter"}, + {"MarcImporter", "com.google.refine.importers.MarcImporter"}, + {"TsvCsvImporter", "com.google.refine.importers.TsvCsvImporter"} + }; + + static { + registerImporters(importerNames); + } + + static public boolean registerImporters(String[][] importers) { + boolean status = true; + for (String[] importer : importerNames) { + String importerName = importer[0]; + String className = importer[1]; + logger.debug("Loading command " + importerName + " class: " + className); + Importer cmd; + try { + // TODO: May need to use the servlet container's class loader here + cmd = (Importer) Class.forName(className).newInstance(); + } catch (InstantiationException e) { + logger.error("Failed to load importer class " + className, e); + status = false; + continue; + } catch (IllegalAccessException e) { + logger.error("Failed to load importer class " + className, e); + status = false; + continue; + } catch (ClassNotFoundException e) { + logger.error("Failed to load importer class " + className, e); + status = false; + continue; + } + status |= registerImporter(importerName, cmd); + } + return status; + } + + /** + * Register a single importer. + * + * @param name importer verb for importer + * @param importerObject object implementing the importer + * + * @return true if importer was loaded and registered successfully + */ + static public boolean registerImporter(String name, Importer importerObject) { + if (importers.containsKey(name)) { + return false; + } + importers.put(name, importerObject); + return true; + } + + // Currently only for test purposes + static protected boolean unregisterImporter(String verb) { + return importers.remove(verb) != null; + } + + static public Importer guessImporter(String contentType, String fileName, boolean provideDefault) { + for (Importer i : importers.values()){ + if(i.canImportData(contentType, fileName)){ + return i; + } + } + if (provideDefault) { + return new TsvCsvImporter(); // default + } else { + return null; + } + } + + static public Importer guessImporter(String contentType, String filename) { + return guessImporter(contentType, filename, true); + } + + static public Importer guessUrlImporter(URL url) { + for (Importer importer : importers.values()){ + if (importer instanceof UrlImporter + && ((UrlImporter) importer).canImportData(url)) { + return importer; + } + } + return null; + } +} diff --git a/main/src/com/google/refine/importers/ImporterUtilities.java b/main/src/com/google/refine/importers/ImporterUtilities.java new file mode 100644 index 000000000..6c5b9bac0 --- /dev/null +++ b/main/src/com/google/refine/importers/ImporterUtilities.java @@ -0,0 +1,111 @@ +package com.google.refine.importers; + +import java.io.Serializable; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; + +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.Row; + +public class ImporterUtilities { + + static public Serializable parseCellValue(String text) { + if (text.length() > 0) { + if (text.length() > 1 && text.startsWith("\"") && text.endsWith("\"")) { + return text.substring(1, text.length() - 1); + } + + try { + return Long.parseLong(text); + } catch (NumberFormatException e) { + } + + try { + double d = Double.parseDouble(text); + if (!Double.isInfinite(d) && !Double.isNaN(d)) { + return d; + } + } catch (NumberFormatException e) { + } + text = text.trim(); + } + return text; + } + + static public int getIntegerOption(String name, Properties options, int def) { + int value = def; + if (options.containsKey(name)) { + String s = options.getProperty(name); + try { + value = Integer.parseInt(s); + } catch (Exception e) { + } + } + return value; + } + + static public boolean getBooleanOption(String name, Properties options, boolean def) { + boolean value = def; + if (options.containsKey(name)) { + String s = options.getProperty(name); + try { + value = s.equalsIgnoreCase("on") || s.equals("1") || Boolean.parseBoolean(s); + } catch (Exception e) { + } + } + return value; + } + + static public void appendColumnName(List columnNames, int index, String name) { + name = name.trim(); + + while (columnNames.size() <= index) { + columnNames.add(""); + } + + if (!name.isEmpty()) { + String oldName = columnNames.get(index); + if (!oldName.isEmpty()) { + name = oldName + " " + name; + } + + columnNames.set(index, name); + } + } + + static public void ensureColumnsInRowExist(List columnNames, Row row) { + int count = row.cells.size(); + while (count > columnNames.size()) { + columnNames.add(""); + } + } + + static public void setupColumns(Project project, List columnNames) { + Map nameToIndex = new HashMap(); + for (int c = 0; c < columnNames.size(); c++) { + String cell = columnNames.get(c).trim(); + if (cell.isEmpty()) { + cell = "Column"; + } else if (cell.startsWith("\"") && cell.endsWith("\"")) { + cell = cell.substring(1, cell.length() - 1).trim(); //FIXME is trimming quotation marks appropriate? + } + + if (nameToIndex.containsKey(cell)) { + int index = nameToIndex.get(cell); + nameToIndex.put(cell, index + 1); + + cell = cell.contains(" ") ? (cell + " " + index) : (cell + index); + } else { + nameToIndex.put(cell, 2); + } + + Column column = new Column(c, cell); + + project.columnModel.columns.add(column); + } + } + +} diff --git a/main/src/com/google/refine/importers/MarcImporter.java b/main/src/com/google/refine/importers/MarcImporter.java new file mode 100644 index 000000000..0dc860a38 --- /dev/null +++ b/main/src/com/google/refine/importers/MarcImporter.java @@ -0,0 +1,108 @@ +package com.google.refine.importers; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.Properties; + +import org.marc4j.MarcPermissiveStreamReader; +import org.marc4j.MarcWriter; +import org.marc4j.MarcXmlWriter; +import org.marc4j.marc.Record; + +import com.google.refine.ProjectMetadata; +import com.google.refine.model.Project; + +public class MarcImporter implements StreamImporter { + + @Override + public void read( + InputStream inputStream, + Project project, + ProjectMetadata metadata, Properties options + ) throws ImportException { + int limit = ImporterUtilities.getIntegerOption("limit",options,-1); + int skip = ImporterUtilities.getIntegerOption("skip",options,0); + + File tempFile; + try { + tempFile = File.createTempFile("gridworks-import-", ".marc.xml"); + } catch (IOException e) { + throw new ImportException("Unexpected error creating temp file",e); + } + try { + OutputStream os = new FileOutputStream(tempFile); + try { + MarcPermissiveStreamReader reader = new MarcPermissiveStreamReader( + inputStream, + true, + true + ); + MarcWriter writer = new MarcXmlWriter(os, true); + + int count = 0; + while (reader.hasNext()) { + Record record = reader.next(); + if (skip <= 0) { + if (limit == -1 || count < limit) { + writer.write(record); + count++; + } else { + break; + } + } else { + skip--; + } + } + writer.close(); + } finally { + try { + os.close(); + } catch (IOException e) { + // Just ignore - not much we can do anyway + } + } + + InputStream is = new FileInputStream(tempFile); + try { + new XmlImporter().read(is, project, metadata, options); + } finally { + try { + is.close(); + } catch (IOException e) { + // Just ignore - not much we can do anyway + } + } + } catch (FileNotFoundException e) { + throw new ImportException("Input file not found", e); + } finally { + tempFile.delete(); + } + } + + @Override + public boolean canImportData(String contentType, String fileName) { + if (contentType != null) { + contentType = contentType.toLowerCase().trim(); + + if ("application/marc".equals(contentType)) { + return true; + } + } else if (fileName != null) { + fileName = fileName.toLowerCase(); + if ( + fileName.endsWith(".mrc") || + fileName.endsWith(".marc") || + fileName.contains(".mrc.") || + fileName.contains(".marc.") + ) { + return true; + } + } + return false; + } +} diff --git a/main/src/com/google/refine/importers/RdfTripleImporter.java b/main/src/com/google/refine/importers/RdfTripleImporter.java new file mode 100644 index 000000000..d87bd6a21 --- /dev/null +++ b/main/src/com/google/refine/importers/RdfTripleImporter.java @@ -0,0 +1,142 @@ +package com.google.refine.importers; + +import java.io.IOException; +import java.io.Reader; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.Map.Entry; + +import org.jrdf.JRDFFactory; +import org.jrdf.SortedMemoryJRDFFactory; +import org.jrdf.collection.MemMapFactory; +import org.jrdf.graph.Graph; +import org.jrdf.graph.Triple; +import org.jrdf.parser.ParseException; +import org.jrdf.parser.StatementHandlerException; +import org.jrdf.parser.line.GraphLineParser; +import org.jrdf.parser.line.LineHandler; +import org.jrdf.parser.ntriples.NTriplesParserFactory; +import org.jrdf.util.ClosableIterable; +import static org.jrdf.graph.AnyObjectNode.ANY_OBJECT_NODE; +import static org.jrdf.graph.AnyPredicateNode.ANY_PREDICATE_NODE; +import static org.jrdf.graph.AnySubjectNode.ANY_SUBJECT_NODE; + +import com.google.refine.ProjectMetadata; +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.ModelException; +import com.google.refine.model.Project; +import com.google.refine.model.Row; + +public class RdfTripleImporter implements ReaderImporter{ + private JRDFFactory _jrdfFactory; + private NTriplesParserFactory _nTriplesParserFactory; + private MemMapFactory _newMapFactory; + + public RdfTripleImporter(){ + _jrdfFactory = SortedMemoryJRDFFactory.getFactory(); + _nTriplesParserFactory = new NTriplesParserFactory(); + _newMapFactory = new MemMapFactory(); + } + + @Override + public void read(Reader reader, Project project, ProjectMetadata metadata, Properties options) throws ImportException { + String baseUrl = options.getProperty("base-url"); + + Graph graph = _jrdfFactory.getNewGraph(); + LineHandler lineHandler = _nTriplesParserFactory.createParser(graph, _newMapFactory); + GraphLineParser parser = new GraphLineParser(graph, lineHandler); + try { + parser.parse(reader, baseUrl); // fills JRDF graph + } catch (IOException e) { + throw new ImportException("i/o error while parsing RDF",e); + } catch (ParseException e) { + throw new ImportException("error parsing RDF",e); + } catch (StatementHandlerException e) { + throw new ImportException("error parsing RDF",e); + } + + Map> subjectToRows = new HashMap>(); + + Column subjectColumn = new Column(0, "subject"); + project.columnModel.columns.add(0, subjectColumn); + project.columnModel.setKeyColumnIndex(0); + + ClosableIterable triples = graph.find(ANY_SUBJECT_NODE, ANY_PREDICATE_NODE, ANY_OBJECT_NODE); + try { + for (Triple triple : triples) { + String subject = triple.getSubject().toString(); + String predicate = triple.getPredicate().toString(); + String object = triple.getObject().toString(); + + Column column = project.columnModel.getColumnByName(predicate); + if (column == null) { + column = new Column(project.columnModel.allocateNewCellIndex(), predicate); + try { + project.columnModel.addColumn(-1, column, true); + } catch (ModelException e) { + // ignore + } + } + + int cellIndex = column.getCellIndex(); + if (subjectToRows.containsKey(subject)) { + List rows = subjectToRows.get(subject); + for (Row row : rows) { + if (!ExpressionUtils.isNonBlankData(row.getCellValue(cellIndex))) { + row.setCell(cellIndex, new Cell(object, null)); + object = null; + break; + } + } + + if (object != null) { + Row row = new Row(project.columnModel.getMaxCellIndex() + 1); + rows.add(row); + + row.setCell(cellIndex, new Cell(object, null)); + } + } else { + List rows = new ArrayList(); + subjectToRows.put(subject, rows); + + Row row = new Row(project.columnModel.getMaxCellIndex() + 1); + rows.add(row); + + row.setCell(subjectColumn.getCellIndex(), new Cell(subject, null)); + row.setCell(cellIndex, new Cell(object, null)); + } + } + + for (Entry> entry : subjectToRows.entrySet()) { + project.rows.addAll(entry.getValue()); + } + } finally { + triples.iterator().close(); + } + } + + + @Override + public boolean canImportData(String contentType, String fileName) { + if (contentType != null) { + contentType = contentType.toLowerCase().trim(); + + if("application/rdf+xml".equals(contentType)) { + return true; + } + } else if (fileName != null) { + fileName = fileName.toLowerCase(); + if ( + fileName.endsWith(".rdf")) { + return true; + } + } + return false; + } + +} diff --git a/main/src/com/google/refine/importers/ReaderImporter.java b/main/src/com/google/refine/importers/ReaderImporter.java new file mode 100644 index 000000000..6848e9eeb --- /dev/null +++ b/main/src/com/google/refine/importers/ReaderImporter.java @@ -0,0 +1,30 @@ +package com.google.refine.importers; + +import java.io.Reader; +import java.util.Properties; + +import com.google.refine.ProjectMetadata; +import com.google.refine.model.Project; + +/** + * Interface for importers which take a Reader as input. + */ +public interface ReaderImporter extends Importer { + + /** + * Read data from a input reader into project. + * + * @param reader + * reader to import data from. It is assumed to be positioned at + * the correct point and ready to go. + * @param project + * project which will contain data + * @param metadata + * metadata of new project + * @param options + * set of properties with import options + * @throws ImportException + */ + public void read(Reader reader, Project project, ProjectMetadata metadata, Properties options) + throws ImportException; +} diff --git a/main/src/com/google/refine/importers/StreamImporter.java b/main/src/com/google/refine/importers/StreamImporter.java new file mode 100644 index 000000000..20098269f --- /dev/null +++ b/main/src/com/google/refine/importers/StreamImporter.java @@ -0,0 +1,21 @@ +package com.google.refine.importers; + +import java.io.InputStream; +import java.util.Properties; + +import com.google.refine.ProjectMetadata; +import com.google.refine.model.Project; + +public interface StreamImporter extends Importer { + + /** + * @param inputStream stream to be imported + * @param project project to import stream into + * @param metadata metadata of new project + * @param options + * @throws ImportException + */ + public void read(InputStream inputStream, Project project, + ProjectMetadata metadata, Properties options) throws ImportException; + +} diff --git a/main/src/com/google/refine/importers/TsvCsvImporter.java b/main/src/com/google/refine/importers/TsvCsvImporter.java new file mode 100644 index 000000000..a4599cfb4 --- /dev/null +++ b/main/src/com/google/refine/importers/TsvCsvImporter.java @@ -0,0 +1,206 @@ +package com.google.refine.importers; + +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.LineNumberReader; +import java.io.Reader; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +import org.apache.commons.lang.StringUtils; + +import au.com.bytecode.opencsv.CSVParser; + +import com.google.refine.ProjectMetadata; +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.model.Cell; +import com.google.refine.model.Project; +import com.google.refine.model.Row; + +public class TsvCsvImporter implements ReaderImporter,StreamImporter { + + @Override + public void read(Reader reader, Project project, ProjectMetadata metadata, Properties options) throws ImportException { + boolean splitIntoColumns = ImporterUtilities.getBooleanOption("split-into-columns", options, true); + + String sep = options.getProperty("separator"); // auto-detect if not present + int ignoreLines = ImporterUtilities.getIntegerOption("ignore", options, -1); + int headerLines = ImporterUtilities.getIntegerOption("header-lines", options, 1); + + int limit = ImporterUtilities.getIntegerOption("limit",options,-1); + int skip = ImporterUtilities.getIntegerOption("skip",options,0); + boolean guessValueType = ImporterUtilities.getBooleanOption("guess-value-type", options, true); + boolean ignoreQuotes = ImporterUtilities.getBooleanOption("ignore-quotes", options, false); + + LineNumberReader lnReader = new LineNumberReader(reader); + + try { + read(lnReader, project, sep, + limit, skip, ignoreLines, headerLines, + guessValueType, splitIntoColumns, ignoreQuotes + ); + } catch (IOException e) { + throw new ImportException("Import failed",e); + } + } + + /** + * + * @param lnReader + * LineNumberReader used to read file or string contents + * @param project + * The project into which the parsed data will be added + * @param sep + * The character used to denote different the break between data points + * @param limit + * The maximum number of rows of data to import + * @param skip + * The number of initial data rows to skip + * @param ignoreLines + * The number of initial lines within the data source which should be ignored entirely + * @param headerLines + * The number of lines in the data source which describe each column + * @param guessValueType + * Whether the parser should try and guess the type of the value being parsed + * @param splitIntoColumns + * Whether the parser should try and split the data source into columns + * @param ignoreQuotes + * Quotation marks are ignored, and all separators and newlines treated as such regardless of whether they are within quoted values + * @throws IOException + */ + public void read(LineNumberReader lnReader, Project project, String sep, int limit, int skip, int ignoreLines, int headerLines, boolean guessValueType, boolean splitIntoColumns, boolean ignoreQuotes ) throws IOException{ + CSVParser parser = (sep != null && sep.length() > 0 && splitIntoColumns) ? + new CSVParser(sep.toCharArray()[0],//HACK changing string to char - won't work for multi-char separators. + CSVParser.DEFAULT_QUOTE_CHARACTER, + CSVParser.DEFAULT_ESCAPE_CHARACTER, + CSVParser.DEFAULT_STRICT_QUOTES, + CSVParser.DEFAULT_IGNORE_LEADING_WHITESPACE, + ignoreQuotes) : null; + List columnNames = new ArrayList(); + String line = null; + int rowsWithData = 0; + + while ((line = lnReader.readLine()) != null) { + if (ignoreLines > 0) { + ignoreLines--; + continue; + } else if (StringUtils.isBlank(line)) { + continue; + } + + //guess separator + if (parser == null) { + int tab = line.indexOf('\t'); + if (tab >= 0) { + parser = new CSVParser('\t', + CSVParser.DEFAULT_QUOTE_CHARACTER, + CSVParser.DEFAULT_ESCAPE_CHARACTER, + CSVParser.DEFAULT_STRICT_QUOTES, + CSVParser.DEFAULT_IGNORE_LEADING_WHITESPACE, + ignoreQuotes); + } else { + parser = new CSVParser(',', + CSVParser.DEFAULT_QUOTE_CHARACTER, + CSVParser.DEFAULT_ESCAPE_CHARACTER, + CSVParser.DEFAULT_STRICT_QUOTES, + CSVParser.DEFAULT_IGNORE_LEADING_WHITESPACE, + ignoreQuotes); + } + } + + + if (headerLines > 0) { + //column headers + headerLines--; + + ArrayList cells = getCells(line, parser, lnReader, splitIntoColumns); + + for (int c = 0; c < cells.size(); c++) { + String cell = cells.get(c).trim(); + //add column even if cell is blank + ImporterUtilities.appendColumnName(columnNames, c, cell); + } + } else { + //data + Row row = new Row(columnNames.size()); + + ArrayList cells = getCells(line, parser, lnReader, splitIntoColumns); + + if( cells != null && cells.size() > 0 ) + rowsWithData++; + + if (skip <=0 || rowsWithData > skip){ + //add parsed data to row + for(String s : cells){ + s = s.trim(); + if (ExpressionUtils.isNonBlankData(s)) { + Serializable value = guessValueType ? ImporterUtilities.parseCellValue(s) : s; + row.cells.add(new Cell(value, null)); + }else{ + row.cells.add(null); + } + } + project.rows.add(row); + project.columnModel.setMaxCellIndex(row.cells.size()); + + ImporterUtilities.ensureColumnsInRowExist(columnNames, row); + + if (limit > 0 && project.rows.size() >= limit) { + break; + } + } + } + } + + ImporterUtilities.setupColumns(project, columnNames); + } + + protected ArrayList getCells(String line, CSVParser parser, LineNumberReader lnReader, boolean splitIntoColumns) throws IOException{ + ArrayList cells = new ArrayList(); + if(splitIntoColumns){ + String[] tokens = parser.parseLineMulti(line); + for(String s : tokens){ + cells.add(s); + } + while(parser.isPending()){ + tokens = parser.parseLineMulti(lnReader.readLine()); + for(String s : tokens){ + cells.add(s); + } + } + }else{ + cells.add(line); + } + return cells; + } + + @Override + public void read(InputStream inputStream, Project project, + ProjectMetadata metadata, Properties options) throws ImportException { + read(new InputStreamReader(inputStream), project, metadata, options); + } + + @Override + public boolean canImportData(String contentType, String fileName) { + if (contentType != null) { + contentType = contentType.toLowerCase().trim(); + return + "text/plain".equals(contentType) || + "text/csv".equals(contentType) || + "text/x-csv".equals(contentType) || + "text/tab-separated-value".equals(contentType); + + } else if (fileName != null) { + fileName = fileName.toLowerCase(); + if (fileName.endsWith(".tsv")) { + return true; + }else if (fileName.endsWith(".csv")){ + return true; + } + } + return false; + } +} diff --git a/main/src/com/google/refine/importers/UrlImporter.java b/main/src/com/google/refine/importers/UrlImporter.java new file mode 100644 index 000000000..83bd07997 --- /dev/null +++ b/main/src/com/google/refine/importers/UrlImporter.java @@ -0,0 +1,15 @@ +package com.google.refine.importers; + +import java.net.URL; +import java.util.Properties; + +import com.google.refine.ProjectMetadata; +import com.google.refine.model.Project; + +public interface UrlImporter extends Importer { + + public void read(URL url, Project project, ProjectMetadata metadata, Properties options) throws Exception; + + public boolean canImportData(URL url); + +} diff --git a/main/src/com/google/refine/importers/XmlImportUtilities.java b/main/src/com/google/refine/importers/XmlImportUtilities.java new file mode 100644 index 000000000..e0cae4da2 --- /dev/null +++ b/main/src/com/google/refine/importers/XmlImportUtilities.java @@ -0,0 +1,633 @@ +package com.google.refine.importers; + +import java.io.InputStream; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import javax.xml.stream.XMLInputFactory; +import javax.xml.stream.XMLStreamConstants; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamReader; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.Row; + +public class XmlImportUtilities { + final static Logger logger = LoggerFactory.getLogger("XmlImporterUtilities"); + + /** + * An element which holds sub-elements we + * shall import as records + */ + static protected class RecordElementCandidate { + String[] path; + int count; + } + + /** + * + * + * + */ + static protected abstract class ImportVertical { + public String name = ""; + public int nonBlankCount; + + abstract void tabulate(); + } + + /** + * A column group describes a branch in tree structured data + */ + static public class ImportColumnGroup extends ImportVertical { + public Map subgroups = new HashMap(); + public Map columns = new HashMap(); + public int nextRowIndex; + + @Override + void tabulate() { + for (ImportColumn c : columns.values()) { + c.tabulate(); + nonBlankCount = Math.max(nonBlankCount, c.nonBlankCount); + } + for (ImportColumnGroup g : subgroups.values()) { + g.tabulate(); + nonBlankCount = Math.max(nonBlankCount, g.nonBlankCount); + } + } + } + + /** + * A column is used to describe a branch-terminating element in a tree structure + * + */ + static public class ImportColumn extends ImportVertical { + public int cellIndex; + public int nextRowIndex; + public boolean blankOnFirstRow; + + public ImportColumn() {} + + public ImportColumn(String name) { //required for testing + super.name = name; + } + + @Override + void tabulate() { + // already done the tabulation elsewhere + } + } + + /** + * A record describes a data element in a tree-structure + * + */ + static public class ImportRecord { + public List> rows = new LinkedList>(); + } + + static public String[] detectPathFromTag(InputStream inputStream, String tag) { + try { + XMLStreamReader parser = XMLInputFactory.newInstance().createXMLStreamReader(inputStream); + + while (parser.hasNext()) { + int eventType = parser.next(); + if (eventType == XMLStreamConstants.START_ELEMENT) { + List path = detectRecordElement(parser, tag); + if (path != null) { + String[] path2 = new String[path.size()]; + + path.toArray(path2); + + return path2; + } + } + } + } catch (Exception e) { + // silent + // e.printStackTrace(); + } + + return null; + } + + /** + * Looks for an element with the given tag name in the Xml being parsed, returning the path hierarchy to reach it. + * + * @param parser + * @param tag + * The Xml element name (can be qualified) to search for + * @return + * If the tag is found, an array of strings is returned. + * If the tag is at the top level, the tag will be the only item in the array. + * If the tag is nested beneath the top level, the array is filled with the hierarchy with the tag name at the last index + * Null if the the tag is not found. + * @throws XMLStreamException + */ + static protected List detectRecordElement(XMLStreamReader parser, String tag) throws XMLStreamException { + if(parser.getEventType() == XMLStreamConstants.START_DOCUMENT) + parser.next(); + String localName = parser.getLocalName(); + String fullName = composeName(parser.getPrefix(), localName); + if (tag.equals(parser.getLocalName()) || tag.equals(fullName)) { + List path = new LinkedList(); + path.add(localName); + + return path; + } + + while (parser.hasNext()) { + int eventType = parser.next(); + if (eventType == XMLStreamConstants.END_ELEMENT) { + break; + } else if (eventType == XMLStreamConstants.START_ELEMENT) { + List path = detectRecordElement(parser, tag); + if (path != null) { + path.add(0, localName); + return path; + } + } + } + return null; + } + + /** + * Seeks for recurring XML element in an InputStream + * which are likely candidates for being data records + * @param inputStream + * The XML data as a stream + * @return + * The path to the most numerous of the possible candidates. + * null if no candidates were found (less than 6 recurrences) + */ + static public String[] detectRecordElement(InputStream inputStream) { + logger.trace("detectRecordElement(inputStream)"); + List candidates = new ArrayList(); + + try { + XMLStreamReader parser = XMLInputFactory.newInstance().createXMLStreamReader(inputStream); + + while (parser.hasNext()) { + int eventType = parser.next(); + if (eventType == XMLStreamConstants.START_ELEMENT) { + RecordElementCandidate candidate = + detectRecordElement( + parser, + new String[] { parser.getLocalName() }); + + if (candidate != null) { + candidates.add(candidate); + } + } + } + } catch (Exception e) { + // silent + // e.printStackTrace(); + } + + if (candidates.size() > 0) { + sortRecordElementCandidates(candidates); + + return candidates.get(0).path; + } + logger.info("No candidate elements were found in Xml - at least 6 similar elements are required"); + return null; + } + + static protected RecordElementCandidate detectRecordElement(XMLStreamReader parser, String[] path) { + logger.trace("detectRecordElement(XMLStreamReader, String[])"); + List descendantCandidates = new ArrayList(); + + Map immediateChildCandidateMap = new HashMap(); + int textNodeCount = 0; + int childElementNodeCount = 0; + + try { + while (parser.hasNext()) { + int eventType = parser.next(); + if (eventType == XMLStreamConstants.END_ELEMENT) { + break; + } else if (eventType == XMLStreamConstants.CHARACTERS) { + if (parser.getText().trim().length() > 0) { + textNodeCount++; + } + } else if (eventType == XMLStreamConstants.START_ELEMENT) { + childElementNodeCount++; + + String tagName = parser.getLocalName(); + + immediateChildCandidateMap.put( + tagName, + immediateChildCandidateMap.containsKey(tagName) ? + immediateChildCandidateMap.get(tagName) + 1 : 1); + + String[] path2 = new String[path.length + 1]; + System.arraycopy(path, 0, path2, 0, path.length); + path2[path.length] = tagName; + + RecordElementCandidate c = detectRecordElement(parser, path2); + if (c != null) { + descendantCandidates.add(c); + } + } + } + } catch (Exception e) { + // silent + // e.printStackTrace(); + } + + if (textNodeCount > 0 && childElementNodeCount > 0) { + // This is a mixed element + return null; + } + + if (immediateChildCandidateMap.size() > 0) { + List immediateChildCandidates = new ArrayList(immediateChildCandidateMap.size()); + for (Entry entry : immediateChildCandidateMap.entrySet()) { + int count = entry.getValue(); + if (count > 1) { + String[] path2 = new String[path.length + 1]; + System.arraycopy(path, 0, path2, 0, path.length); + path2[path.length] = entry.getKey(); + + RecordElementCandidate candidate = new RecordElementCandidate(); + candidate.path = path2; + candidate.count = count; + immediateChildCandidates.add(candidate); + } + } + + if (immediateChildCandidates.size() > 0 && immediateChildCandidates.size() < 5) { + // There are some promising immediate child elements, but not many, + // that can serve as record elements. + + sortRecordElementCandidates(immediateChildCandidates); + + RecordElementCandidate ourCandidate = immediateChildCandidates.get(0); + logger.trace("ourCandidate.count : " + ourCandidate.count + "; immediateChildCandidates.size() : " + immediateChildCandidates.size()); + if (ourCandidate.count / immediateChildCandidates.size() > 5) { + return ourCandidate; + } + + descendantCandidates.add(ourCandidate); + } + } + + if (descendantCandidates.size() > 0) { + sortRecordElementCandidates(descendantCandidates); + + RecordElementCandidate candidate = descendantCandidates.get(0); + if (candidate.count / descendantCandidates.size() > 5) { + return candidate; + } + } + + return null; + } + + static public void sortRecordElementCandidates(List list) { + Collections.sort(list, new Comparator() { + public int compare(RecordElementCandidate o1, RecordElementCandidate o2) { + return o2.count - o1.count; + } + }); + } + + static public void importXml( + InputStream inputStream, + Project project, + String[] recordPath, + ImportColumnGroup rootColumnGroup + ) { + try { + XMLStreamReader parser = XMLInputFactory.newInstance().createXMLStreamReader(inputStream); + + while (parser.hasNext()) { + int eventType = parser.next(); + if (eventType == XMLStreamConstants.START_ELEMENT) { + findRecord(project, parser, recordPath, 0, rootColumnGroup); + } + } + } catch (Exception e) { + e.printStackTrace(); + // silent + } + } + + static public void createColumnsFromImport( + Project project, + ImportColumnGroup columnGroup + ) { + int startColumnIndex = project.columnModel.columns.size(); + + List columns = new ArrayList(columnGroup.columns.values()); + Collections.sort(columns, new Comparator() { + public int compare(ImportColumn o1, ImportColumn o2) { + if (o1.blankOnFirstRow != o2.blankOnFirstRow) { + return o1.blankOnFirstRow ? 1 : -1; + } + + int c = o2.nonBlankCount - o1.nonBlankCount; + return c != 0 ? c : (o1.name.length() - o2.name.length()); + } + }); + + for (int i = 0; i < columns.size(); i++) { + ImportColumn c = columns.get(i); + + Column column = new com.google.refine.model.Column(c.cellIndex, c.name); + project.columnModel.columns.add(column); + } + + List subgroups = new ArrayList(columnGroup.subgroups.values()); + Collections.sort(subgroups, new Comparator() { + public int compare(ImportColumnGroup o1, ImportColumnGroup o2) { + int c = o2.nonBlankCount - o1.nonBlankCount; + return c != 0 ? c : (o1.name.length() - o2.name.length()); + } + }); + + for (ImportColumnGroup g : subgroups) { + createColumnsFromImport(project, g); + } + + int endColumnIndex = project.columnModel.columns.size(); + int span = endColumnIndex - startColumnIndex; + if (span > 1 && span < project.columnModel.columns.size()) { + project.columnModel.addColumnGroup(startColumnIndex, span, startColumnIndex); + } + } + + /** + * + * @param project + * @param parser + * @param recordPath + * @param pathIndex + * @param rootColumnGroup + * @throws XMLStreamException + */ + static protected void findRecord( + Project project, + XMLStreamReader parser, + String[] recordPath, + int pathIndex, + ImportColumnGroup rootColumnGroup + ) throws XMLStreamException { + if(parser.getEventType() == XMLStreamConstants.START_DOCUMENT){ + logger.warn("Cannot use findRecord method for START_DOCUMENT event"); + return; + } + String tagName = parser.getLocalName(); + if (tagName.equals(recordPath[pathIndex])) { + if (pathIndex < recordPath.length - 1) { + while (parser.hasNext()) { + int eventType = parser.next(); + if (eventType == XMLStreamConstants.START_ELEMENT) { + findRecord(project, parser, recordPath, pathIndex + 1, rootColumnGroup); + } else if (eventType == XMLStreamConstants.END_ELEMENT) { + break; + } + } + } else { + processRecord(project, parser, rootColumnGroup); + } + } else { + skip(parser); + } + } + + static protected void skip(XMLStreamReader parser) throws XMLStreamException { + while (parser.hasNext()) { + int eventType = parser.next(); + if (eventType == XMLStreamConstants.START_ELEMENT) { + skip(parser); + } else if (eventType == XMLStreamConstants.END_ELEMENT) { + return; + } + } + } + + /** + * processRecord parsesXml for a single element and it's sub-elements, + * adding the parsed data as a row to the project + * @param project + * @param parser + * @param rootColumnGroup + * @throws XMLStreamException + */ + static protected void processRecord( + Project project, + XMLStreamReader parser, + ImportColumnGroup rootColumnGroup + ) throws XMLStreamException { + ImportRecord record = new ImportRecord(); + + processSubRecord(project, parser, rootColumnGroup, record); + + if (record.rows.size() > 0) { + for (List row : record.rows) { + Row realRow = new Row(row.size()); + int cellCount = 0; + + for (int c = 0; c < row.size(); c++) { + Cell cell = row.get(c); + if (cell != null) { + realRow.setCell(c, cell); + cellCount++; + } + } + + if (cellCount > 0) { + project.rows.add(realRow); + } + } + } + } + + static protected String composeName(String prefix, String localName) { + return prefix != null && prefix.length() > 0 ? (prefix + ":" + localName) : localName; + } + + /** + * + * @param project + * @param parser + * @param columnGroup + * @param record + * @throws XMLStreamException + */ + static protected void processSubRecord( + Project project, + XMLStreamReader parser, + ImportColumnGroup columnGroup, + ImportRecord record + ) throws XMLStreamException { + ImportColumnGroup thisColumnGroup = getColumnGroup( + project, + columnGroup, + composeName(parser.getPrefix(), parser.getLocalName())); + + thisColumnGroup.nextRowIndex = Math.max(thisColumnGroup.nextRowIndex, columnGroup.nextRowIndex); + + int attributeCount = parser.getAttributeCount(); + for (int i = 0; i < attributeCount; i++) { + String text = parser.getAttributeValue(i).trim(); + if (text.length() > 0) { + addCell( + project, + thisColumnGroup, + record, + composeName(parser.getAttributePrefix(i), parser.getAttributeLocalName(i)), + text + ); + } + } + + while (parser.hasNext()) { + int eventType = parser.next(); + if (eventType == XMLStreamConstants.START_ELEMENT) { + processSubRecord( + project, + parser, + thisColumnGroup, + record + ); + } else if (//eventType == XMLStreamConstants.CDATA || + eventType == XMLStreamConstants.CHARACTERS) { + String text = parser.getText().trim(); + if (text.length() > 0) { + addCell( + project, + thisColumnGroup, + record, + null, + parser.getText() + ); + } + } else if (eventType == XMLStreamConstants.END_ELEMENT) { + break; + } + } + + int nextRowIndex = thisColumnGroup.nextRowIndex; + for (ImportColumn column2 : thisColumnGroup.columns.values()) { + nextRowIndex = Math.max(nextRowIndex, column2.nextRowIndex); + } + for (ImportColumnGroup columnGroup2 : thisColumnGroup.subgroups.values()) { + nextRowIndex = Math.max(nextRowIndex, columnGroup2.nextRowIndex); + } + thisColumnGroup.nextRowIndex = nextRowIndex; + } + + static protected void addCell( + Project project, + ImportColumnGroup columnGroup, + ImportRecord record, + String columnLocalName, + String text + ) { + if (text == null || ((String) text).isEmpty()) { + return; + } + + Serializable value = ImporterUtilities.parseCellValue(text); + + ImportColumn column = getColumn(project, columnGroup, columnLocalName); + int cellIndex = column.cellIndex; + + int rowIndex = Math.max(columnGroup.nextRowIndex, column.nextRowIndex); + while (rowIndex >= record.rows.size()) { + record.rows.add(new ArrayList()); + } + + List row = record.rows.get(rowIndex); + while (cellIndex >= row.size()) { + row.add(null); + } + + logger.trace("Adding cell with value : \"" + value + "\" to row : " + rowIndex + " at cell index : " + (cellIndex-1)); + + row.set(cellIndex, new Cell(value, null)); + + column.nextRowIndex = rowIndex + 1; + column.nonBlankCount++; + } + + static protected ImportColumn getColumn( + Project project, + ImportColumnGroup columnGroup, + String localName + ) { + if (columnGroup.columns.containsKey(localName)) { + return columnGroup.columns.get(localName); + } + + ImportColumn column = createColumn(project, columnGroup, localName); + columnGroup.columns.put(localName, column); + + return column; + } + + static protected ImportColumn createColumn( + Project project, + ImportColumnGroup columnGroup, + String localName + ) { + ImportColumn newColumn = new ImportColumn(); + + newColumn.name = + columnGroup.name.length() == 0 ? + (localName == null ? "Text" : localName) : + (localName == null ? columnGroup.name : (columnGroup.name + " - " + localName)); + + newColumn.cellIndex = project.columnModel.allocateNewCellIndex(); + newColumn.nextRowIndex = columnGroup.nextRowIndex; + + return newColumn; + } + + static protected ImportColumnGroup getColumnGroup( + Project project, + ImportColumnGroup columnGroup, + String localName + ) { + if (columnGroup.subgroups.containsKey(localName)) { + return columnGroup.subgroups.get(localName); + } + + ImportColumnGroup subgroup = createColumnGroup(project, columnGroup, localName); + columnGroup.subgroups.put(localName, subgroup); + + return subgroup; + } + + static protected ImportColumnGroup createColumnGroup( + Project project, + ImportColumnGroup columnGroup, + String localName + ) { + ImportColumnGroup newGroup = new ImportColumnGroup(); + + newGroup.name = + columnGroup.name.length() == 0 ? + (localName == null ? "Text" : localName) : + (localName == null ? columnGroup.name : (columnGroup.name + " - " + localName)); + + newGroup.nextRowIndex = columnGroup.nextRowIndex; + + return newGroup; + } +} diff --git a/main/src/com/google/refine/importers/XmlImporter.java b/main/src/com/google/refine/importers/XmlImporter.java new file mode 100644 index 000000000..344dcdc13 --- /dev/null +++ b/main/src/com/google/refine/importers/XmlImporter.java @@ -0,0 +1,91 @@ +package com.google.refine.importers; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.PushbackInputStream; +import java.util.Properties; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.refine.ProjectMetadata; +import com.google.refine.importers.XmlImportUtilities.ImportColumnGroup; +import com.google.refine.model.Project; + +public class XmlImporter implements StreamImporter { + + final static Logger logger = LoggerFactory.getLogger("XmlImporter"); + + public static final int BUFFER_SIZE = 64 * 1024; + + @Override + public void read( + InputStream inputStream, + Project project, + ProjectMetadata metadata, Properties options + ) throws ImportException { + logger.trace("XmlImporter.read"); + PushbackInputStream pis = new PushbackInputStream(inputStream,BUFFER_SIZE); + + String[] recordPath = null; + { + byte[] buffer = new byte[BUFFER_SIZE]; + int bytes_read = 0; + try { + while (bytes_read < BUFFER_SIZE) { + int c = pis.read(buffer, bytes_read, BUFFER_SIZE - bytes_read); + if (c == -1) break; + bytes_read +=c ; + } + pis.unread(buffer, 0, bytes_read); + } catch (IOException e) { + throw new ImportException("Read error",e); + } + + if (options.containsKey("importer-record-tag")) { + recordPath = XmlImportUtilities.detectPathFromTag( + new ByteArrayInputStream(buffer, 0, bytes_read), + options.getProperty("importer-record-tag")); + } else { + recordPath = XmlImportUtilities.detectRecordElement( + new ByteArrayInputStream(buffer, 0, bytes_read)); + } + } + + if (recordPath == null) + return; + + ImportColumnGroup rootColumnGroup = new ImportColumnGroup(); + + XmlImportUtilities.importXml(pis, project, recordPath, rootColumnGroup); + XmlImportUtilities.createColumnsFromImport(project, rootColumnGroup); + + project.columnModel.update(); + } + + @Override + public boolean canImportData(String contentType, String fileName) { + if (contentType != null) { + contentType = contentType.toLowerCase().trim(); + + if("application/xml".equals(contentType) || + "text/xml".equals(contentType) || + "application/rss+xml".equals(contentType) || + "application/atom+xml".equals(contentType)) { + return true; + } + } else if (fileName != null) { + fileName = fileName.toLowerCase(); + if ( + fileName.endsWith(".xml") || + fileName.endsWith(".atom") || + fileName.endsWith(".rss") + ) { + return true; + } + } + return false; + } + +} diff --git a/main/src/com/google/refine/importers/parsers/NonSplitRowParser.java b/main/src/com/google/refine/importers/parsers/NonSplitRowParser.java new file mode 100644 index 000000000..346f2ea76 --- /dev/null +++ b/main/src/com/google/refine/importers/parsers/NonSplitRowParser.java @@ -0,0 +1,39 @@ +package com.google.refine.importers.parsers; + +import java.io.LineNumberReader; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; + +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.importers.ImporterUtilities; +import com.google.refine.model.Cell; +import com.google.refine.model.Row; + +public class NonSplitRowParser extends RowParser { + + public List split(String line, LineNumberReader lineReader) { + List results = new ArrayList(1); + + results.add(line.trim()); + + return results; + } + + public boolean parseRow(Row row, String line, boolean guessValueType, LineNumberReader lineReader) { + line = line.trim(); + if (line.isEmpty()) { + return false; + } else { + Serializable value = guessValueType ? ImporterUtilities.parseCellValue(line) : line; + if (ExpressionUtils.isNonBlankData(value)) { + row.cells.add(new Cell(value, null)); + return true; + } else { + row.cells.add(null); + return false; + } + } + } + +} diff --git a/main/src/com/google/refine/importers/parsers/RowParser.java b/main/src/com/google/refine/importers/parsers/RowParser.java new file mode 100644 index 000000000..bde474acd --- /dev/null +++ b/main/src/com/google/refine/importers/parsers/RowParser.java @@ -0,0 +1,12 @@ +package com.google.refine.importers.parsers; + +import java.io.LineNumberReader; +import java.util.List; + +import com.google.refine.model.Row; + +public abstract class RowParser { + public abstract List split(String line, LineNumberReader lineReader); + + public abstract boolean parseRow(Row row, String line, boolean guessValueType, LineNumberReader lineReader); +} diff --git a/main/src/com/google/refine/importers/parsers/SeparatorRowParser.java b/main/src/com/google/refine/importers/parsers/SeparatorRowParser.java new file mode 100644 index 000000000..9dc13948c --- /dev/null +++ b/main/src/com/google/refine/importers/parsers/SeparatorRowParser.java @@ -0,0 +1,52 @@ +package com.google.refine.importers.parsers; + +import java.io.LineNumberReader; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.lang.StringUtils; + +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.importers.ImporterUtilities; +import com.google.refine.model.Cell; +import com.google.refine.model.Row; + +public class SeparatorRowParser extends RowParser { + + String sep; + + public SeparatorRowParser(String sep) { + this.sep = sep; + } + + public List split(String line, LineNumberReader lineReader) { + String[] cells = StringUtils.splitPreserveAllTokens(line, sep); + + List results = new ArrayList(); + for (int c = 0; c < cells.length; c++) { + results.add(cells[c]); + } + + return results; + } + + public boolean parseRow(Row row, String line, boolean guessValueType, LineNumberReader lineReader) { + boolean hasData = false; + + String[] cells = StringUtils.splitPreserveAllTokens(line, sep); + for (int c = 0; c < cells.length; c++) { + String text = cells[c]; + + Serializable value = guessValueType ? ImporterUtilities.parseCellValue(text) : text; + if (ExpressionUtils.isNonBlankData(value)) { + row.cells.add(new Cell(value, null)); + hasData = true; + } else { + row.cells.add(null); + } + } + return hasData; + } + +} diff --git a/main/src/com/google/refine/io/FileHistoryEntryManager.java b/main/src/com/google/refine/io/FileHistoryEntryManager.java new file mode 100644 index 000000000..a475cc88c --- /dev/null +++ b/main/src/com/google/refine/io/FileHistoryEntryManager.java @@ -0,0 +1,109 @@ +package com.google.refine.io; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.InputStreamReader; +import java.util.Properties; +import java.util.zip.ZipEntry; +import java.util.zip.ZipFile; +import java.util.zip.ZipOutputStream; +import java.io.Writer; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.ProjectManager; +import com.google.refine.history.History; +import com.google.refine.history.HistoryEntry; +import com.google.refine.history.HistoryEntryManager; +import com.google.refine.util.Pool; + + +public class FileHistoryEntryManager implements HistoryEntryManager{ + + public void delete(HistoryEntry historyEntry) { + File file = getChangeFile(historyEntry); + if (file.exists()) { + file.delete(); + } + } + + public void save(HistoryEntry historyEntry, Writer writer, Properties options) { + JSONWriter jsonWriter = new JSONWriter(writer); + try { + historyEntry.write(jsonWriter, options); + } catch (JSONException e) { + e.printStackTrace(); + } + } + + public void loadChange(HistoryEntry historyEntry) { + File changeFile = getChangeFile(historyEntry); + + try { + loadChange(historyEntry, changeFile); + } catch (Exception e) { + throw new RuntimeException("Failed to load change file " + changeFile.getAbsolutePath(), e); + } + } + + protected void loadChange(HistoryEntry historyEntry, File file) throws Exception { + ZipFile zipFile = new ZipFile(file); + try { + Pool pool = new Pool(); + ZipEntry poolEntry = zipFile.getEntry("pool.txt"); + if (poolEntry != null) { + pool.load(new InputStreamReader( + zipFile.getInputStream(poolEntry))); + } // else, it's a legacy project file + + historyEntry.setChange(History.readOneChange( + zipFile.getInputStream(zipFile.getEntry("change.txt")), pool)); + } finally { + zipFile.close(); + } + } + + public void saveChange(HistoryEntry historyEntry) throws Exception { + File changeFile = getChangeFile(historyEntry); + if (!(changeFile.exists())) { + saveChange(historyEntry, changeFile); + } + } + + protected void saveChange(HistoryEntry historyEntry, File file) throws Exception { + ZipOutputStream out = new ZipOutputStream(new FileOutputStream(file)); + try { + Pool pool = new Pool(); + + out.putNextEntry(new ZipEntry("change.txt")); + try { + History.writeOneChange(out, historyEntry.getChange(), pool); + } finally { + out.closeEntry(); + } + + out.putNextEntry(new ZipEntry("pool.txt")); + try { + pool.save(out); + } finally { + out.closeEntry(); + } + } finally { + out.close(); + } + } + + protected File getChangeFile(HistoryEntry historyEntry) { + return new File(getHistoryDir(historyEntry), historyEntry.id + ".change.zip"); + } + + protected File getHistoryDir(HistoryEntry historyEntry) { + File dir = new File(((FileProjectManager)ProjectManager.singleton) + .getProjectDir(historyEntry.projectID), + "history"); + dir.mkdirs(); + + return dir; + } +} \ No newline at end of file diff --git a/main/src/com/google/refine/io/FileProjectManager.java b/main/src/com/google/refine/io/FileProjectManager.java new file mode 100644 index 000000000..dca311a29 --- /dev/null +++ b/main/src/com/google/refine/io/FileProjectManager.java @@ -0,0 +1,367 @@ +package com.google.refine.io; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.Properties; +import java.util.zip.GZIPInputStream; + +import org.apache.tools.tar.TarEntry; +import org.apache.tools.tar.TarInputStream; +import org.apache.tools.tar.TarOutputStream; +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONTokener; +import org.json.JSONWriter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.refine.ProjectManager; +import com.google.refine.ProjectMetadata; +import com.google.refine.history.HistoryEntryManager; +import com.google.refine.model.Project; +import com.google.refine.preference.TopList; + +public class FileProjectManager extends ProjectManager { + final static protected String s_projectDirNameSuffix = ".project"; + + protected File _workspaceDir; + + final static Logger logger = LoggerFactory.getLogger("FileProjectManager"); + + static public synchronized void initialize(File dir) { + if (singleton == null) { + logger.info("Using workspace directory: {}", dir.getAbsolutePath()); + singleton = new FileProjectManager(dir); + } + + } + + protected FileProjectManager(File dir) { + super(); + _workspaceDir = dir; + _workspaceDir.mkdirs(); + + load(); + recover(); + } + + public File getWorkspaceDir() { + return _workspaceDir; + } + + static public File getProjectDir(File workspaceDir, long projectID) { + File dir = new File(workspaceDir, projectID + s_projectDirNameSuffix); + if (!dir.exists()) { + dir.mkdir(); + } + return dir; + } + + public File getProjectDir(long projectID) { + return getProjectDir(_workspaceDir, projectID); + } + + /** + * Import an external project that has been received as a .tar file, expanded, and + * copied into our workspace directory. + * + * @param projectID + */ + public boolean loadProjectMetadata(long projectID) { + synchronized (this) { + ProjectMetadata metadata = ProjectMetadataUtilities.load(getProjectDir(projectID)); + if (metadata != null) { + _projectsMetadata.put(projectID, metadata); + return true; + } else { + return false; + } + } + } + + public void importProject(long projectID, InputStream inputStream, boolean gziped) throws IOException { + File destDir = this.getProjectDir(projectID); + destDir.mkdirs(); + + if (gziped) { + GZIPInputStream gis = new GZIPInputStream(inputStream); + untar(destDir, gis); + } else { + untar(destDir, inputStream); + } + } + + protected void untar(File destDir, InputStream inputStream) throws IOException { + TarInputStream tin = new TarInputStream(inputStream); + TarEntry tarEntry = null; + + while ((tarEntry = tin.getNextEntry()) != null) { + File destEntry = new File(destDir, tarEntry.getName()); + File parent = destEntry.getParentFile(); + + if (!parent.exists()) { + parent.mkdirs(); + } + + if (tarEntry.isDirectory()) { + destEntry.mkdirs(); + } else { + FileOutputStream fout = new FileOutputStream(destEntry); + try { + tin.copyEntryContents(fout); + } finally { + fout.close(); + } + } + } + } + + public void exportProject(long projectId, TarOutputStream tos) throws IOException { + File dir = this.getProjectDir(projectId); + this.tarDir("", dir, tos); + } + + protected void tarDir(String relative, File dir, TarOutputStream tos) throws IOException{ + File[] files = dir.listFiles(); + for (File file : files) { + if (!file.isHidden()) { + String path = relative + file.getName(); + + if (file.isDirectory()) { + tarDir(path + File.separator, file, tos); + } else { + TarEntry entry = new TarEntry(path); + + entry.setMode(TarEntry.DEFAULT_FILE_MODE); + entry.setSize(file.length()); + entry.setModTime(file.lastModified()); + + tos.putNextEntry(entry); + + copyFile(file, tos); + + tos.closeEntry(); + } + } + } + } + + protected void copyFile(File file, OutputStream os) throws IOException { + final int buffersize = 4096; + + FileInputStream fis = new FileInputStream(file); + try { + byte[] buf = new byte[buffersize]; + int count; + + while((count = fis.read(buf, 0, buffersize)) != -1) { + os.write(buf, 0, count); + } + } finally { + fis.close(); + } + } + + @Override + protected void saveMetadata(ProjectMetadata metadata, long projectId) throws Exception { + File projectDir = getProjectDir(projectId); + ProjectMetadataUtilities.save(metadata, projectDir); + } + + @Override + protected void saveProject(Project project){ + ProjectUtilities.save(project); + } + + public Project loadProject(long id) { + return ProjectUtilities.load(getProjectDir(id), id); + } + + + + /** + * Save the workspace's data out to file in a safe way: save to a temporary file first + * and rename it to the real file. + */ + @Override + protected void saveWorkspace() { + synchronized (this) { + File tempFile = new File(_workspaceDir, "workspace.temp.json"); + try { + saveToFile(tempFile); + } catch (Exception e) { + e.printStackTrace(); + + logger.warn("Failed to save workspace"); + return; + } + + File file = new File(_workspaceDir, "workspace.json"); + File oldFile = new File(_workspaceDir, "workspace.old.json"); + + if (file.exists()) { + file.renameTo(oldFile); + } + + tempFile.renameTo(file); + if (oldFile.exists()) { + oldFile.delete(); + } + + logger.info("Saved workspace"); + } + } + + protected void saveToFile(File file) throws IOException, JSONException { + FileWriter writer = new FileWriter(file); + try { + JSONWriter jsonWriter = new JSONWriter(writer); + jsonWriter.object(); + jsonWriter.key("projectIDs"); + jsonWriter.array(); + for (Long id : _projectsMetadata.keySet()) { + ProjectMetadata metadata = _projectsMetadata.get(id); + if (metadata != null) { + jsonWriter.value(id); + + try { + ProjectMetadataUtilities.save(metadata, getProjectDir(id)); + } catch (Exception e) { + e.printStackTrace(); + } + } + } + jsonWriter.endArray(); + writer.write('\n'); + + jsonWriter.key("preferences"); + _preferenceStore.write(jsonWriter, new Properties()); + + jsonWriter.endObject(); + } finally { + writer.close(); + } + } + + + + public void deleteProject(long projectID) { + synchronized (this) { + removeProject(projectID); + + File dir = getProjectDir(projectID); + if (dir.exists()) { + deleteDir(dir); + } + } + + saveWorkspace(); + } + + static protected void deleteDir(File dir) { + for (File file : dir.listFiles()) { + if (file.isDirectory()) { + deleteDir(file); + } else { + file.delete(); + } + } + dir.delete(); + } + + protected void load() { + if (loadFromFile(new File(_workspaceDir, "workspace.json"))) return; + if (loadFromFile(new File(_workspaceDir, "workspace.temp.json"))) return; + if (loadFromFile(new File(_workspaceDir, "workspace.old.json"))) return; + } + + protected boolean loadFromFile(File file) { + logger.info("Loading workspace: {}", file.getAbsolutePath()); + + _projectsMetadata.clear(); + + boolean found = false; + + if (file.exists() || file.canRead()) { + FileReader reader = null; + try { + reader = new FileReader(file); + JSONTokener tokener = new JSONTokener(reader); + JSONObject obj = (JSONObject) tokener.nextValue(); + + JSONArray a = obj.getJSONArray("projectIDs"); + int count = a.length(); + for (int i = 0; i < count; i++) { + long id = a.getLong(i); + + File projectDir = getProjectDir(id); + ProjectMetadata metadata = ProjectMetadataUtilities.load(projectDir); + + _projectsMetadata.put(id, metadata); + } + + if (obj.has("preferences") && !obj.isNull("preferences")) { + _preferenceStore.load(obj.getJSONObject("preferences")); + } + + if (obj.has("expressions") && !obj.isNull("expressions")) { // backward compatibility + ((TopList) _preferenceStore.get("scripting.expressions")) + .load(obj.getJSONArray("expressions")); + } + + found = true; + } catch (JSONException e) { + logger.warn("Error reading file", e); + } catch (IOException e) { + logger.warn("Error reading file", e); + } finally { + try { + reader.close(); + } catch (IOException e) { + logger.warn("Exception closing file",e); + } + } + } + + return found; + } + + protected void recover() { + for (File file : _workspaceDir.listFiles()) { + if (file.isDirectory() && !file.isHidden()) { + String name = file.getName(); + if (file.getName().endsWith(s_projectDirNameSuffix)) { + String idString = name.substring(0, name.length() - s_projectDirNameSuffix.length()); + long id = -1; + try { + id = Long.parseLong(idString); + } catch (NumberFormatException e) { + // ignore + } + + if (id > 0 && !_projectsMetadata.containsKey(id)) { + if (loadProjectMetadata(id)) { + logger.info( + "Recovered project named " + + getProjectMetadata(id).getName() + + " in directory " + name); + } else { + logger.warn("Failed to recover project in directory " + name); + } + } + } + } + } + } + + public HistoryEntryManager getHistoryEntryManager(){ + return new FileHistoryEntryManager(); + } +} diff --git a/main/src/com/google/refine/io/ProjectMetadataUtilities.java b/main/src/com/google/refine/io/ProjectMetadataUtilities.java new file mode 100644 index 000000000..4bf2f1fc5 --- /dev/null +++ b/main/src/com/google/refine/io/ProjectMetadataUtilities.java @@ -0,0 +1,85 @@ +package com.google.refine.io; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.FileReader; +import java.io.OutputStreamWriter; +import java.io.Writer; + +import org.json.JSONObject; +import org.json.JSONTokener; +import org.json.JSONWriter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.refine.ProjectMetadata; + + +public class ProjectMetadataUtilities { + final static Logger logger = LoggerFactory.getLogger("project_metadata_utilities"); + + public static void save(ProjectMetadata projectMeta, File projectDir) throws Exception { + File tempFile = new File(projectDir, "metadata.temp.json"); + try { + saveToFile(projectMeta, tempFile); + } catch (Exception e) { + e.printStackTrace(); + + logger.warn("Failed to save project metadata"); + return; + } + + File file = new File(projectDir, "metadata.json"); + File oldFile = new File(projectDir, "metadata.old.json"); + + if (file.exists()) { + file.renameTo(oldFile); + } + + tempFile.renameTo(file); + if (oldFile.exists()) { + oldFile.delete(); + } + } + + protected static void saveToFile(ProjectMetadata projectMeta, File metadataFile) throws Exception { + Writer writer = new OutputStreamWriter(new FileOutputStream(metadataFile)); + try { + JSONWriter jsonWriter = new JSONWriter(writer); + projectMeta.write(jsonWriter); + } finally { + writer.close(); + } + } + + static public ProjectMetadata load(File projectDir) { + try { + return loadFromFile(new File(projectDir, "metadata.json")); + } catch (Exception e) { + } + + try { + return loadFromFile(new File(projectDir, "metadata.temp.json")); + } catch (Exception e) { + } + + try { + return loadFromFile(new File(projectDir, "metadata.old.json")); + } catch (Exception e) { + } + + return null; + } + + static protected ProjectMetadata loadFromFile(File metadataFile) throws Exception { + FileReader reader = new FileReader(metadataFile); + try { + JSONTokener tokener = new JSONTokener(reader); + JSONObject obj = (JSONObject) tokener.nextValue(); + + return ProjectMetadata.loadFromJSON(obj); + } finally { + reader.close(); + } + } +} diff --git a/main/src/com/google/refine/io/ProjectUtilities.java b/main/src/com/google/refine/io/ProjectUtilities.java new file mode 100644 index 000000000..9742d0fc2 --- /dev/null +++ b/main/src/com/google/refine/io/ProjectUtilities.java @@ -0,0 +1,134 @@ +package com.google.refine.io; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.InputStreamReader; +import java.io.LineNumberReader; +import java.util.zip.ZipEntry; +import java.util.zip.ZipFile; +import java.util.zip.ZipOutputStream; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.refine.ProjectManager; +import com.google.refine.model.Project; +import com.google.refine.util.Pool; + + +public class ProjectUtilities { + final static Logger logger = LoggerFactory.getLogger("project_utilities"); + + synchronized public static void save(Project project) { + synchronized (project) { + long id = project.id; + File dir = ((FileProjectManager)ProjectManager.singleton).getProjectDir(id); + + File tempFile = new File(dir, "data.temp.zip"); + try { + saveToFile(project, tempFile); + } catch (Exception e) { + e.printStackTrace(); + + logger.warn("Failed to save project {}", id); + return; + } + + File file = new File(dir, "data.zip"); + File oldFile = new File(dir, "data.old.zip"); + + if (file.exists()) { + file.renameTo(oldFile); + } + + tempFile.renameTo(file); + if (oldFile.exists()) { + oldFile.delete(); + } + + project.setLastSave(); + + logger.info("Saved project '{}'",id); + } + } + + protected static void saveToFile(Project project, File file) throws Exception { + ZipOutputStream out = new ZipOutputStream(new FileOutputStream(file)); + try { + Pool pool = new Pool(); + + out.putNextEntry(new ZipEntry("data.txt")); + try { + project.saveToOutputStream(out, pool); + } finally { + out.closeEntry(); + } + + out.putNextEntry(new ZipEntry("pool.txt")); + try { + pool.save(out); + } finally { + out.closeEntry(); + } + } finally { + out.close(); + } + } + + static public Project load(File dir, long id) { + try { + File file = new File(dir, "data.zip"); + if (file.exists()) { + return loadFromFile(file, id); + } + } catch (Exception e) { + e.printStackTrace(); + } + + try { + File file = new File(dir, "data.temp.zip"); + if (file.exists()) { + return loadFromFile(file, id); + } + } catch (Exception e) { + e.printStackTrace(); + } + + try { + File file = new File(dir, "data.old.zip"); + if (file.exists()) { + return loadFromFile(file, id); + } + } catch (Exception e) { + e.printStackTrace(); + } + + return null; + } + + static protected Project loadFromFile( + File file, + long id + ) throws Exception { + ZipFile zipFile = new ZipFile(file); + try { + Pool pool = new Pool(); + ZipEntry poolEntry = zipFile.getEntry("pool.txt"); + if (poolEntry != null) { + pool.load(new InputStreamReader( + zipFile.getInputStream(poolEntry))); + } // else, it's a legacy project file + + return Project.loadFromReader( + new LineNumberReader( + new InputStreamReader( + zipFile.getInputStream( + zipFile.getEntry("data.txt")))), + id, + pool + ); + } finally { + zipFile.close(); + } + } +} diff --git a/main/src/com/google/refine/logging/IndentingLayout.java b/main/src/com/google/refine/logging/IndentingLayout.java new file mode 100644 index 000000000..93091e5b2 --- /dev/null +++ b/main/src/com/google/refine/logging/IndentingLayout.java @@ -0,0 +1,143 @@ +package com.google.refine.logging; + +/* + * Copyright (c) Massachusetts Institute of Technology, 2007 + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Original code: http://simile.mit.edu/repository/tracer/trunk/ + */ + +import java.util.Calendar; +import java.util.Date; + +import org.apache.log4j.Layout; +import org.apache.log4j.spi.LoggingEvent; + +/** + * This is a special Log4j log formatter that is capable of reacting on special log messages + * and 'indent' the logs accordingly. This is very useful to visually inspect a debug log + * and see what calls what. An example of logs are "> method()" and "< method()" where > and < + * are used to indicate respectively "entering" and "exiting". + */ +public class IndentingLayout extends Layout { + + protected static final int CONTEXT_SIZE = 25; + protected static final long MAX_DELTA = 10000; + + protected Calendar calendar = Calendar.getInstance(); + protected long previousTime = 0; + protected int indentation = 0; + + public void activateOptions() { + // no options at this time + } + + public String format(LoggingEvent event) { + String message = event.getRenderedMessage(); + if (message == null) return ""; + if (message.length() < 2) return message; + + char leader = message.charAt(0); + char secondLeader = message.charAt(1); + if ((leader == '<') && (secondLeader == ' ') && (this.indentation > 0)) this.indentation--; + + // Reset buf + StringBuffer buf = new StringBuffer(256); + + Date date = new Date(); + long now = date.getTime(); + calendar.setTime(date); + + long delta = 0; + if (previousTime > 0) { + delta = now - previousTime; + } + previousTime = now; + +// if ((previousTime == 0) || (delta > MAX_DELTA)) { +// buf.append('\n'); +// indentation = 0; // reset indentation after a while, as we might +// // have runaway/unmatched log entries +// } + + int hour = calendar.get(Calendar.HOUR_OF_DAY); + if (hour < 10) buf.append('0'); + buf.append(hour); + buf.append(':'); + + int mins = calendar.get(Calendar.MINUTE); + if (mins < 10) buf.append('0'); + buf.append(mins); + buf.append(':'); + + int secs = calendar.get(Calendar.SECOND); + if (secs < 10) buf.append('0'); + buf.append(secs); + buf.append('.'); + + int millis = (int) (now % 1000); + if (millis < 100) buf.append('0'); + if (millis < 10) buf.append('0'); + buf.append(millis); + + buf.append(" ["); + String context = ((String) event.getMDC("LogEvent")); + if (context == null) { + context = event.getLoggerName(); + } + if (context.length() < CONTEXT_SIZE) { + pad(buf, CONTEXT_SIZE - context.length(), ' '); + buf.append(context); + } else { + buf.append(".."); + buf.append(context.substring(context.length() - CONTEXT_SIZE + 2)); + } + buf.append("] "); + + pad(buf, indentation, ' '); + + buf.append(message); + + buf.append(" ("); + buf.append(delta); + buf.append("ms)\n"); + + if ((leader == '>') && (secondLeader == ' ')) indentation++; + + return buf.toString(); + } + + private void pad(StringBuffer buffer, int pads, char padchar) { + for (int i = 0; i < pads; i++) { + buffer.append(padchar); + } + } + + public boolean ignoresThrowable() { + return true; + } +} diff --git a/main/src/com/google/refine/model/AbstractOperation.java b/main/src/com/google/refine/model/AbstractOperation.java new file mode 100644 index 000000000..025e9a37f --- /dev/null +++ b/main/src/com/google/refine/model/AbstractOperation.java @@ -0,0 +1,31 @@ +package com.google.refine.model; + +import java.util.Properties; + +import com.google.refine.Jsonizable; +import com.google.refine.history.HistoryEntry; +import com.google.refine.process.Process; +import com.google.refine.process.QuickHistoryEntryProcess; + +/* + * An abstract operation can be applied to different but similar + * projects. + */ +abstract public class AbstractOperation implements Jsonizable { + public Process createProcess(Project project, Properties options) throws Exception { + return new QuickHistoryEntryProcess(project, getBriefDescription(null)) { + @Override + protected HistoryEntry createHistoryEntry(long historyEntryID) throws Exception { + return AbstractOperation.this.createHistoryEntry(_project, historyEntryID); + } + }; + } + + protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception { + throw new UnsupportedOperationException(); + } + + protected String getBriefDescription(Project project) { + throw new UnsupportedOperationException(); + } +} diff --git a/main/src/com/google/refine/model/Cell.java b/main/src/com/google/refine/model/Cell.java new file mode 100644 index 000000000..cb5413d5e --- /dev/null +++ b/main/src/com/google/refine/model/Cell.java @@ -0,0 +1,149 @@ +package com.google.refine.model; + +import java.io.Serializable; +import java.io.Writer; +import java.util.Calendar; +import java.util.Date; +import java.util.Properties; + +import org.codehaus.jackson.JsonFactory; +import org.codehaus.jackson.JsonParser; +import org.codehaus.jackson.JsonToken; +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.Jsonizable; +import com.google.refine.expr.EvalError; +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.expr.HasFields; +import com.google.refine.util.ParsingUtilities; +import com.google.refine.util.Pool; + +public class Cell implements HasFields, Jsonizable { + final public Serializable value; + final public Recon recon; + + public Cell(Serializable value, Recon recon) { + this.value = value; + this.recon = recon; + } + + public Object getField(String name, Properties bindings) { + if ("value".equals(name)) { + return value; + } else if ("recon".equals(name)) { + return recon; + } + return null; + } + + public boolean fieldAlsoHasFields(String name) { + return "recon".equals(name); + } + + public void write(JSONWriter writer, Properties options) throws JSONException { + writer.object(); + if (ExpressionUtils.isError(value)) { + writer.key("e"); + writer.value(((EvalError) value).message); + } else { + writer.key("v"); + if (value != null) { + if (value instanceof Calendar) { + writer.value(ParsingUtilities.dateToString(((Calendar) value).getTime())); + writer.key("t"); writer.value("date"); + } else if (value instanceof Date) { + writer.value(ParsingUtilities.dateToString((Date) value)); + writer.key("t"); writer.value("date"); + } else { + writer.value(value); + } + } else { + writer.value(null); + } + } + + if (recon != null) { + writer.key("r"); + writer.value(Long.toString(recon.id)); + + Pool pool = (Pool) options.get("pool"); + pool.pool(recon); + } + writer.endObject(); + } + + public void save(Writer writer, Properties options) { + JSONWriter jsonWriter = new JSONWriter(writer); + try { + write(jsonWriter, options); + } catch (JSONException e) { + e.printStackTrace(); + } + } + + static public Cell loadStreaming(String s, Pool pool) throws Exception { + JsonFactory jsonFactory = new JsonFactory(); + JsonParser jp = jsonFactory.createJsonParser(s); + + if (jp.nextToken() != JsonToken.START_OBJECT) { + return null; + } + + return loadStreaming(jp, pool); + } + + static public Cell loadStreaming(JsonParser jp, Pool pool) throws Exception { + JsonToken t = jp.getCurrentToken(); + if (t == JsonToken.VALUE_NULL || t != JsonToken.START_OBJECT) { + return null; + } + + Serializable value = null; + String type = null; + Recon recon = null; + + while (jp.nextToken() != JsonToken.END_OBJECT) { + String fieldName = jp.getCurrentName(); + jp.nextToken(); + + if ("r".equals(fieldName)) { + if (jp.getCurrentToken() == JsonToken.VALUE_STRING) { + String reconID = jp.getText(); + + recon = pool.getRecon(reconID); + } else { + // legacy + recon = Recon.loadStreaming(jp, pool); + } + } else if ("e".equals(fieldName)) { + value = new EvalError(jp.getText()); + } else if ("v".equals(fieldName)) { + JsonToken token = jp.getCurrentToken(); + + if (token == JsonToken.VALUE_STRING) { + value = jp.getText(); + } else if (token == JsonToken.VALUE_NUMBER_INT) { + value = jp.getLongValue(); + } else if (token == JsonToken.VALUE_NUMBER_FLOAT) { + value = jp.getDoubleValue(); + } else if (token == JsonToken.VALUE_TRUE) { + value = true; + } else if (token == JsonToken.VALUE_FALSE) { + value = false; + } + } else if ("t".equals(fieldName)) { + type = jp.getText(); + } + } + + if (value != null) { + if (type != null && "date".equals(type)) { + value = ParsingUtilities.stringToDate((String) value); + } + return new Cell(value, recon); + } else { + return null; + } + } +} diff --git a/main/src/com/google/refine/model/Column.java b/main/src/com/google/refine/model/Column.java new file mode 100644 index 000000000..42d4f7cb8 --- /dev/null +++ b/main/src/com/google/refine/model/Column.java @@ -0,0 +1,123 @@ +package com.google.refine.model; + +import java.io.Writer; +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.Jsonizable; +import com.google.refine.model.recon.ReconConfig; +import com.google.refine.util.ParsingUtilities; + +public class Column implements Jsonizable { + final private int _cellIndex; + final private String _originalName; + private String _name; + private ReconConfig _reconConfig; + private ReconStats _reconStats; + + transient protected Map _precomputes; + + public Column(int cellIndex, String originalName) { + _cellIndex = cellIndex; + _originalName = _name = originalName; + } + + public int getCellIndex() { + return _cellIndex; + } + + public String getOriginalHeaderLabel() { + return _originalName; + } + + public void setName(String name) { + this._name = name; + } + + public String getName() { + return _name; + } + + public void setReconConfig(ReconConfig config) { + this._reconConfig = config; + } + + public ReconConfig getReconConfig() { + return _reconConfig; + } + + public void setReconStats(ReconStats stats) { + this._reconStats = stats; + } + + public ReconStats getReconStats() { + return _reconStats; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("cellIndex"); writer.value(_cellIndex); + writer.key("originalName"); writer.value(_originalName); + writer.key("name"); writer.value(_name); + if (_reconConfig != null) { + writer.key("reconConfig"); + _reconConfig.write(writer, options); + } + if (_reconStats != null) { + writer.key("reconStats"); + _reconStats.write(writer, options); + } + writer.endObject(); + } + + public void clearPrecomputes() { + if (_precomputes != null) { + _precomputes.clear(); + } + } + + public Object getPrecompute(String key) { + if (_precomputes != null) { + return _precomputes.get(key); + } + return null; + } + + public void setPrecompute(String key, Object value) { + if (_precomputes == null) { + _precomputes = new HashMap(); + } + _precomputes.put(key, value); + } + + public void save(Writer writer) { + JSONWriter jsonWriter = new JSONWriter(writer); + try { + write(jsonWriter, new Properties()); + } catch (JSONException e) { + e.printStackTrace(); + } + } + + static public Column load(String s) throws Exception { + JSONObject obj = ParsingUtilities.evaluateJsonStringToObject(s); + Column column = new Column(obj.getInt("cellIndex"), obj.getString("originalName")); + + column._name = obj.getString("name"); + if (obj.has("reconConfig")) { + column._reconConfig = ReconConfig.reconstruct(obj.getJSONObject("reconConfig")); + } + if (obj.has("reconStats")) { + column._reconStats = ReconStats.load(obj.getJSONObject("reconStats")); + } + + return column; + } +} diff --git a/main/src/com/google/refine/model/ColumnGroup.java b/main/src/com/google/refine/model/ColumnGroup.java new file mode 100644 index 000000000..fee74effc --- /dev/null +++ b/main/src/com/google/refine/model/ColumnGroup.java @@ -0,0 +1,76 @@ +package com.google.refine.model; + +import java.io.Writer; +import java.util.LinkedList; +import java.util.List; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.Jsonizable; +import com.google.refine.util.ParsingUtilities; + +public class ColumnGroup implements Jsonizable { + final public int startColumnIndex; + final public int columnSpan; + final public int keyColumnIndex; // could be -1 if there is no key cell + + transient public ColumnGroup parentGroup; + transient public List subgroups; + + public ColumnGroup(int startColumnIndex, int columnSpan, int keyColumnIndex) { + this.startColumnIndex = startColumnIndex; + this.columnSpan = columnSpan; + this.keyColumnIndex = keyColumnIndex; + internalInitialize(); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + + writer.key("startColumnIndex"); writer.value(startColumnIndex); + writer.key("columnSpan"); writer.value(columnSpan); + writer.key("keyColumnIndex"); writer.value(keyColumnIndex); + + if (!"save".equals(options.get("mode")) && (subgroups != null) && (subgroups.size() > 0)) { + writer.key("subgroups"); writer.array(); + for (ColumnGroup g : subgroups) { + g.write(writer, options); + } + writer.endArray(); + } + writer.endObject(); + } + + public boolean contains(ColumnGroup g) { + return (g.startColumnIndex >= startColumnIndex && + g.startColumnIndex < startColumnIndex + columnSpan); + } + + public void save(Writer writer) { + JSONWriter jsonWriter = new JSONWriter(writer); + try { + write(jsonWriter, new Properties()); + } catch (JSONException e) { + e.printStackTrace(); + } + } + + static public ColumnGroup load(String s) throws Exception { + JSONObject obj = ParsingUtilities.evaluateJsonStringToObject(s); + + return new ColumnGroup( + obj.getInt("startColumnIndex"), + obj.getInt("columnSpan"), + obj.getInt("keyColumnIndex") + ); + } + + protected void internalInitialize() { + subgroups = new LinkedList(); + } +} diff --git a/main/src/com/google/refine/model/ColumnModel.java b/main/src/com/google/refine/model/ColumnModel.java new file mode 100644 index 000000000..405e17636 --- /dev/null +++ b/main/src/com/google/refine/model/ColumnModel.java @@ -0,0 +1,244 @@ +package com.google.refine.model; + +import java.io.IOException; +import java.io.LineNumberReader; +import java.io.Writer; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.Jsonizable; + +public class ColumnModel implements Jsonizable { + final public List columns = new LinkedList(); + final public List columnGroups = new LinkedList(); + + private int _maxCellIndex; + private int _keyColumnIndex; + + transient protected Map _nameToColumn; + transient protected Map _cellIndexToColumn; + transient protected List _rootColumnGroups; + transient protected List _columnNames; + + public ColumnModel() { + internalInitialize(); + } + + synchronized public void setMaxCellIndex(int maxCellIndex) { + this._maxCellIndex = Math.max(this._maxCellIndex, maxCellIndex); + } + + public int getMaxCellIndex() { + return _maxCellIndex; + } + + synchronized public int allocateNewCellIndex() { + return ++_maxCellIndex; + } + + public void setKeyColumnIndex(int keyColumnIndex) { + // TODO: check validity of new cell index, e.g., it's not in any group + this._keyColumnIndex = keyColumnIndex; + } + + public int getKeyColumnIndex() { + return _keyColumnIndex; + } + + synchronized public void addColumnGroup(int startColumnIndex, int span, int keyColumnIndex) { + for (ColumnGroup g : columnGroups) { + if (g.startColumnIndex == startColumnIndex && g.columnSpan == span) { + if (g.keyColumnIndex == keyColumnIndex) { + return; + } else { + columnGroups.remove(g); + break; + } + } + } + + ColumnGroup cg = new ColumnGroup(startColumnIndex, span, keyColumnIndex); + + columnGroups.add(cg); + + } + + public void update() { + internalInitialize(); + } + + synchronized public void addColumn(int index, Column column, boolean avoidNameCollision) throws ModelException { + String baseName = column.getName(); + + if (_nameToColumn.containsKey(baseName)) { + if (!avoidNameCollision) { + throw new ModelException("Duplicated column name"); + } + } + + String name = baseName; + int i = 1; + while (true) { + if (_nameToColumn.containsKey(name)) { + i++; + name = baseName + i; + } else { + break; + } + } + + column.setName(name); + columns.add(index < 0 ? columns.size() : index, column); + _nameToColumn.put(name, column); // so the next call can check + } + + synchronized public Column getColumnByName(String name) { + return _nameToColumn.get(name); + } + + synchronized public int getColumnIndexByName(String name) { + for (int i = 0; i < _columnNames.size(); i++) { + String s = _columnNames.get(i); + if (name.equals(s)) { + return i; + } + } + return -1; + } + + synchronized public Column getColumnByCellIndex(int cellIndex) { + return _cellIndexToColumn.get(cellIndex); + } + + synchronized public List getColumnNames() { + return _columnNames; + } + + synchronized public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + + writer.key("columns"); + writer.array(); + for (Column column : columns) { + column.write(writer, options); + } + writer.endArray(); + + if (columns.size() > 0) { + writer.key("keyCellIndex"); writer.value(getKeyColumnIndex()); + writer.key("keyColumnName"); writer.value(columns.get(_keyColumnIndex).getName()); + } + + writer.key("columnGroups"); + writer.array(); + for (ColumnGroup g : _rootColumnGroups) { + g.write(writer, options); + } + writer.endArray(); + + writer.endObject(); + } + + synchronized public void save(Writer writer, Properties options) throws IOException { + writer.write("maxCellIndex="); writer.write(Integer.toString(_maxCellIndex)); writer.write('\n'); + writer.write("keyColumnIndex="); writer.write(Integer.toString(_keyColumnIndex)); writer.write('\n'); + + writer.write("columnCount="); writer.write(Integer.toString(columns.size())); writer.write('\n'); + for (Column column : columns) { + column.save(writer); writer.write('\n'); + } + + writer.write("columnGroupCount="); writer.write(Integer.toString(columnGroups.size())); writer.write('\n'); + for (ColumnGroup group : columnGroups) { + group.save(writer); writer.write('\n'); + } + + writer.write("/e/\n"); + } + + synchronized public void load(LineNumberReader reader) throws Exception { + String line; + while ((line = reader.readLine()) != null && !"/e/".equals(line)) { + int equal = line.indexOf('='); + CharSequence field = line.subSequence(0, equal); + String value = line.substring(equal + 1); + + if ("maxCellIndex".equals(field)) { + _maxCellIndex = Integer.parseInt(value); + } else if ("keyColumnIndex".equals(field)) { + _keyColumnIndex = Integer.parseInt(value); + } else if ("columnCount".equals(field)) { + int count = Integer.parseInt(value); + + for (int i = 0; i < count; i++) { + columns.add(Column.load(reader.readLine())); + } + } else if ("columnGroupCount".equals(field)) { + int count = Integer.parseInt(value); + + for (int i = 0; i < count; i++) { + columnGroups.add(ColumnGroup.load(reader.readLine())); + } + } + } + + internalInitialize(); + } + + synchronized protected void internalInitialize() { + generateMaps(); + + // Turn the flat list of column groups into a tree + + _rootColumnGroups = new LinkedList(columnGroups); + Collections.sort(_rootColumnGroups, new Comparator() { + public int compare(ColumnGroup o1, ColumnGroup o2) { + int firstDiff = o1.startColumnIndex - o2.startColumnIndex; + return firstDiff != 0 ? + firstDiff : // whichever group that starts first goes first + (o2.columnSpan - o1.columnSpan); // otherwise, the larger group goes first + } + }); + + for (int i = _rootColumnGroups.size() - 1; i >= 0; i--) { + ColumnGroup g = _rootColumnGroups.get(i); + + for (int j = i + 1; j < _rootColumnGroups.size(); j++) { + ColumnGroup g2 = _rootColumnGroups.get(j); + if (g2.parentGroup == null && g.contains(g2)) { + g2.parentGroup = g; + g.subgroups.add(g2); + } + } + } + + for (int i = _rootColumnGroups.size() - 1; i >= 0; i--) { + if (_rootColumnGroups.get(i).parentGroup != null) { + _rootColumnGroups.remove(i); + } + } + } + + protected void generateMaps() { + _nameToColumn = new HashMap(); + _cellIndexToColumn = new HashMap(); + _columnNames = new ArrayList(); + + for (Column column : columns) { + _nameToColumn.put(column.getName(), column); + _cellIndexToColumn.put(column.getCellIndex(), column); + _columnNames.add(column.getName()); + } + } +} diff --git a/main/src/com/google/refine/model/ModelException.java b/main/src/com/google/refine/model/ModelException.java new file mode 100644 index 000000000..07f24bf21 --- /dev/null +++ b/main/src/com/google/refine/model/ModelException.java @@ -0,0 +1,25 @@ +package com.google.refine.model; + +public class ModelException extends Exception { + private static final long serialVersionUID = -168448967638065467L; + + public ModelException() { + // TODO Auto-generated constructor stub + } + + public ModelException(String message) { + super(message); + // TODO Auto-generated constructor stub + } + + public ModelException(Throwable cause) { + super(cause); + // TODO Auto-generated constructor stub + } + + public ModelException(String message, Throwable cause) { + super(message, cause); + // TODO Auto-generated constructor stub + } + +} diff --git a/main/src/com/google/refine/model/OverlayModel.java b/main/src/com/google/refine/model/OverlayModel.java new file mode 100644 index 000000000..f06192000 --- /dev/null +++ b/main/src/com/google/refine/model/OverlayModel.java @@ -0,0 +1,11 @@ +package com.google.refine.model; + +import com.google.refine.Jsonizable; + +public interface OverlayModel extends Jsonizable { + public void onBeforeSave(); + + public void onAfterSave(); + + public void dispose(); +} diff --git a/main/src/com/google/refine/model/Project.java b/main/src/com/google/refine/model/Project.java new file mode 100644 index 000000000..a38c58d2d --- /dev/null +++ b/main/src/com/google/refine/model/Project.java @@ -0,0 +1,230 @@ +package com.google.refine.model; + +import java.io.IOException; +import java.io.LineNumberReader; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.Writer; +import java.lang.reflect.Method; +import java.util.ArrayList; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.refine.GridworksServlet; +import com.google.refine.ProjectManager; +import com.google.refine.ProjectMetadata; +import com.google.refine.history.History; +import com.google.refine.process.ProcessManager; +import com.google.refine.protograph.Protograph; +import com.google.refine.util.ParsingUtilities; +import com.google.refine.util.Pool; + +public class Project { + final static protected Map> + s_overlayModelClasses = new HashMap>(); + + static public void registerOverlayModel(String modelName, Class klass) { + s_overlayModelClasses.put(modelName, klass); + } + + static { + registerOverlayModel("freebaseProtograph", Protograph.class); + } + + final public long id; + final public List rows = new ArrayList(); + + final public ColumnModel columnModel = new ColumnModel(); + final public RecordModel recordModel = new RecordModel(); + final public Map overlayModels = new HashMap(); + + final public History history; + + transient public ProcessManager processManager = new ProcessManager(); + transient private Date _lastSave = new Date(); + + final static Logger logger = LoggerFactory.getLogger("project"); + + static public long generateID() { + return System.currentTimeMillis() + Math.round(Math.random() * 1000000000000L); + } + + public Project() { + id = generateID(); + history = new History(this); + } + + protected Project(long id) { + this.id = id; + this.history = new History(this); + } + + public void dispose() { + for (OverlayModel overlayModel : overlayModels.values()) { + try { + overlayModel.dispose(); + } catch (Exception e) { + logger.warn("Error signaling overlay model before disposing", e); + } + } + } + + public Date getLastSave(){ + return this._lastSave; + } + /** + * Sets the lastSave time to now + */ + public void setLastSave(){ + this._lastSave = new Date(); + } + + public ProjectMetadata getMetadata() { + return ProjectManager.singleton.getProjectMetadata(id); + } + + public void saveToOutputStream(OutputStream out, Pool pool) throws IOException { + for (OverlayModel overlayModel : overlayModels.values()) { + try { + overlayModel.onBeforeSave(); + } catch (Exception e) { + logger.warn("Error signaling overlay model before saving", e); + } + } + + Writer writer = new OutputStreamWriter(out); + try { + Properties options = new Properties(); + options.setProperty("mode", "save"); + options.put("pool", pool); + + saveToWriter(writer, options); + } finally { + writer.flush(); + } + + for (OverlayModel overlayModel : overlayModels.values()) { + try { + overlayModel.onAfterSave(); + } catch (Exception e) { + logger.warn("Error signaling overlay model after saving", e); + } + } + } + + protected void saveToWriter(Writer writer, Properties options) throws IOException { + writer.write(GridworksServlet.getVersion()); writer.write('\n'); + + writer.write("columnModel=\n"); columnModel.save(writer, options); + writer.write("history=\n"); history.save(writer, options); + + for (String modelName : overlayModels.keySet()) { + writer.write("overlayModel:"); + writer.write(modelName); + writer.write("="); + + try { + JSONWriter jsonWriter = new JSONWriter(writer); + + overlayModels.get(modelName).write(jsonWriter, options); + } catch (JSONException e) { + e.printStackTrace(); + } + writer.write('\n'); + } + + writer.write("rowCount="); writer.write(Integer.toString(rows.size())); writer.write('\n'); + for (Row row : rows) { + row.save(writer, options); writer.write('\n'); + } + } + + static public Project loadFromReader( + LineNumberReader reader, + long id, + Pool pool + ) throws Exception { + long start = System.currentTimeMillis(); + + /* String version = */ reader.readLine(); + + Project project = new Project(id); + int maxCellCount = 0; + + String line; + while ((line = reader.readLine()) != null) { + int equal = line.indexOf('='); + String field = line.substring(0, equal); + String value = line.substring(equal + 1); + + // backward compatibility + if ("protograph".equals(field)) { + field = "overlayModel:freebaseProtograph"; + } + + if ("columnModel".equals(field)) { + project.columnModel.load(reader); + } else if ("history".equals(field)) { + project.history.load(project, reader); + } else if ("rowCount".equals(field)) { + int count = Integer.parseInt(value); + + for (int i = 0; i < count; i++) { + line = reader.readLine(); + if (line != null) { + Row row = Row.load(line, pool); + project.rows.add(row); + maxCellCount = Math.max(maxCellCount, row.cells.size()); + } + } + } else if (field.startsWith("overlayModel:")) { + String modelName = field.substring("overlayModel:".length()); + if (s_overlayModelClasses.containsKey(modelName)) { + Class klass = s_overlayModelClasses.get(modelName); + + try { + Method loadMethod = klass.getMethod("load", Project.class, JSONObject.class); + JSONObject obj = ParsingUtilities.evaluateJsonStringToObject(value); + + OverlayModel overlayModel = (OverlayModel) loadMethod.invoke(null, project, obj); + + project.overlayModels.put(modelName, overlayModel); + } catch (Exception e) { + logger.error("Failed to load overlay model " + modelName); + } + } + } + } + + project.columnModel.setMaxCellIndex(maxCellCount - 1); + + logger.info( + "Loaded project {} from disk in {} sec(s)",id,Long.toString((System.currentTimeMillis() - start) / 1000) + ); + + project.update(); + + return project; + } + + public void update() { + columnModel.update(); + recordModel.update(this); + } + + + //wrapper of processManager variable to allow unit testing + //TODO make the processManager variable private, and force all calls through this method + public ProcessManager getProcessManager() { + return this.processManager; + } +} diff --git a/main/src/com/google/refine/model/Recon.java b/main/src/com/google/refine/model/Recon.java new file mode 100644 index 000000000..831fbeffd --- /dev/null +++ b/main/src/com/google/refine/model/Recon.java @@ -0,0 +1,376 @@ +package com.google.refine.model; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; + +import org.codehaus.jackson.JsonFactory; +import org.codehaus.jackson.JsonParser; +import org.codehaus.jackson.JsonToken; +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.Jsonizable; +import com.google.refine.expr.HasFields; +import com.google.refine.util.Pool; + +public class Recon implements HasFields, Jsonizable { + + static public enum Judgment { + None, + Matched, + New + } + + static public String judgmentToString(Judgment judgment) { + if (judgment == Judgment.Matched) { + return "matched"; + } else if (judgment == Judgment.New) { + return "new"; + } else { + return "none"; + } + } + + static public Judgment stringToJudgment(String s) { + if ("matched".equals(s)) { + return Judgment.Matched; + } else if ("new".equals(s)) { + return Judgment.New; + } else { + return Judgment.None; + } + } + + static final public int Feature_typeMatch = 0; + static final public int Feature_nameMatch = 1; + static final public int Feature_nameLevenshtein = 2; + static final public int Feature_nameWordDistance = 3; + static final public int Feature_qaResult = 4; + static final public int Feature_max = 5; + + static final protected Map s_featureMap = new HashMap(); + static { + s_featureMap.put("typeMatch", Feature_typeMatch); + s_featureMap.put("nameMatch", Feature_nameMatch); + s_featureMap.put("nameLevenshtein", Feature_nameLevenshtein); + s_featureMap.put("nameWordDistance", Feature_nameWordDistance); + s_featureMap.put("qaResult", Feature_qaResult); + } + + final public long id; + public String service = "unknown"; + public String identifierSpace = null; + public String schemaSpace = null; + + public Object[] features = new Object[Feature_max]; + public List candidates; + + public Judgment judgment = Judgment.None; + public String judgmentAction = "unknown"; + public long judgmentHistoryEntry; + public int judgmentBatchSize = 0; + + public ReconCandidate match = null; + public int matchRank = -1; + + static public Recon makeFreebaseRecon(long judgmentHistoryEntry) { + return new Recon( + judgmentHistoryEntry, + "http://rdf.freebase.com/ns/type.object.id", + "http://rdf.freebase.com/ns/type.object.id"); + } + + public Recon(long judgmentHistoryEntry, String identifierSpace, String schemaSpace) { + id = System.currentTimeMillis() * 1000000 + Math.round(Math.random() * 1000000); + this.judgmentHistoryEntry = judgmentHistoryEntry; + this.identifierSpace = identifierSpace; + this.schemaSpace = schemaSpace; + } + + protected Recon(long id, long judgmentHistoryEntry) { + this.id = id; + this.judgmentHistoryEntry = judgmentHistoryEntry; + } + + public Recon dup() { + Recon r = new Recon(id, judgmentHistoryEntry); + r.identifierSpace = identifierSpace; + r.schemaSpace = schemaSpace; + + copyTo(r); + + return r; + } + + public Recon dup(long judgmentHistoryEntry) { + Recon r = new Recon(judgmentHistoryEntry, identifierSpace, schemaSpace); + + copyTo(r); + + return r; + } + + protected void copyTo(Recon r) { + System.arraycopy(features, 0, r.features, 0, features.length); + + if (candidates != null) { + r.candidates = new ArrayList(candidates); + } + + r.service = service; + + r.judgment = judgment; + + r.judgmentAction = judgmentAction; + r.judgmentBatchSize = judgmentBatchSize; + + r.match = match; + r.matchRank = matchRank; + } + + public void addCandidate(ReconCandidate candidate) { + if (candidates == null) { + candidates = new ArrayList(3); + } + candidates.add(candidate); + } + + public ReconCandidate getBestCandidate() { + if (candidates != null && candidates.size() > 0) { + return candidates.get(0); + } + return null; + } + + public Object getFeature(int feature) { + return feature < features.length ? features[feature] : null; + } + + public void setFeature(int feature, Object v) { + if (feature >= features.length) { + if (feature >= Feature_max) { + return; + } + + // We deserialized this object from an older version of the class + // that had fewer features, so we can just try to extend it + + Object[] newFeatures = new Object[Feature_max]; + + System.arraycopy(features, 0, newFeatures, 0, features.length); + + features = newFeatures; + } + + features[feature] = v; + } + + public Object getField(String name, Properties bindings) { + if ("id".equals(name)) { + return id; + } else if ("best".equals(name)) { + return candidates != null && candidates.size() > 0 ? candidates.get(0) : null; + } else if ("candidates".equals(name)) { + return candidates; + } else if ("judgment".equals(name) || "judgement".equals(name)) { + return judgmentToString(); + } else if ("judgmentAction".equals(name) || "judgementAction".equals(name)) { + return judgmentAction; + } else if ("judgmentHistoryEntry".equals(name) || "judgementHistoryEntry".equals(name)) { + return judgmentHistoryEntry; + } else if ("judgmentBatchSize".equals(name) || "judgementBatchSize".equals(name)) { + return judgmentBatchSize; + } else if ("matched".equals(name)) { + return judgment == Judgment.Matched; + } else if ("new".equals(name)) { + return judgment == Judgment.New; + } else if ("match".equals(name)) { + return match; + } else if ("matchRank".equals(name)) { + return matchRank; + } else if ("features".equals(name)) { + return new Features(); + } else if ("service".equals(name)) { + return service; + } else if ("identifierSpace".equals(name)) { + return identifierSpace; + } else if ("schemaSpace".equals(name)) { + return schemaSpace; + } + return null; + } + + public boolean fieldAlsoHasFields(String name) { + return "match".equals(name) || "best".equals(name); + } + + protected String judgmentToString() { + return judgmentToString(judgment); + } + + public class Features implements HasFields { + public Object getField(String name, Properties bindings) { + int index = s_featureMap.get(name); + return index < features.length ? features[index] : null; + } + + public boolean fieldAlsoHasFields(String name) { + return false; + } + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + boolean saveMode = "save".equals(options.getProperty("mode")); + + writer.object(); + writer.key("id"); writer.value(id); + if (saveMode) { + writer.key("judgmentHistoryEntry"); writer.value(judgmentHistoryEntry); + } + + writer.key("service"); writer.value(service); + writer.key("identifierSpace"); writer.value(identifierSpace); + writer.key("schemaSpace"); writer.value(schemaSpace); + + writer.key("j"); writer.value(judgmentToString()); + if (match != null) { + writer.key("m"); + writer.value(match.id); + } + if (match == null || saveMode) { + writer.key("c"); writer.array(); + if (candidates != null) { + for (ReconCandidate c : candidates) { + writer.value(c.id); + } + } + writer.endArray(); + } + + if (saveMode) { + writer.key("f"); + writer.array(); + for (Object o : features) { + writer.value(o); + } + writer.endArray(); + + writer.key("judgmentAction"); writer.value(judgmentAction); + writer.key("judgmentBatchSize"); writer.value(judgmentBatchSize); + + if (match != null) { + writer.key("matchRank"); writer.value(matchRank); + } + } + + writer.endObject(); + } + + static public Recon loadStreaming(String s, Pool pool) throws Exception { + JsonFactory jsonFactory = new JsonFactory(); + JsonParser jp = jsonFactory.createJsonParser(s); + + if (jp.nextToken() != JsonToken.START_OBJECT) { + return null; + } + return loadStreaming(jp, pool); + } + + static public Recon loadStreaming(JsonParser jp, Pool pool) throws Exception { + JsonToken t = jp.getCurrentToken(); + if (t == JsonToken.VALUE_NULL || t != JsonToken.START_OBJECT) { + return null; + } + + Recon recon = null; + long id = -1; + long judgmentHistoryEntry = -1; + + while (jp.nextToken() != JsonToken.END_OBJECT) { + String fieldName = jp.getCurrentName(); + jp.nextToken(); + + if ("id".equals(fieldName)) { + id = jp.getLongValue(); + } else if ("judgmentHistoryEntry".equals(fieldName)) { + judgmentHistoryEntry = jp.getLongValue(); + if (recon != null) { + recon.judgmentHistoryEntry = judgmentHistoryEntry; + } + } else { + if (recon == null) { + recon = new Recon(id, judgmentHistoryEntry); + } + + if ("j".equals(fieldName)) { + recon.judgment = stringToJudgment(jp.getText()); + } else if ("m".equals(fieldName)) { + if (jp.getCurrentToken() == JsonToken.VALUE_STRING) { + String candidateID = jp.getText(); + + recon.match = pool.getReconCandidate(candidateID); + } else { + // legacy + recon.match = ReconCandidate.loadStreaming(jp); + } + } else if ("f".equals(fieldName)) { + if (jp.getCurrentToken() != JsonToken.START_ARRAY) { + return null; + } + + int feature = 0; + while (jp.nextToken() != JsonToken.END_ARRAY) { + if (feature < recon.features.length) { + JsonToken token = jp.getCurrentToken(); + if (token == JsonToken.VALUE_STRING) { + recon.features[feature++] = jp.getText(); + } else if (token == JsonToken.VALUE_NUMBER_INT) { + recon.features[feature++] = jp.getLongValue(); + } else if (token == JsonToken.VALUE_NUMBER_FLOAT) { + recon.features[feature++] = jp.getDoubleValue(); + } else if (token == JsonToken.VALUE_FALSE) { + recon.features[feature++] = false; + } else if (token == JsonToken.VALUE_TRUE) { + recon.features[feature++] = true; + } + } + } + } else if ("c".equals(fieldName)) { + if (jp.getCurrentToken() != JsonToken.START_ARRAY) { + return null; + } + + while (jp.nextToken() != JsonToken.END_ARRAY) { + if (jp.getCurrentToken() == JsonToken.VALUE_STRING) { + String candidateID = jp.getText(); + + recon.addCandidate(pool.getReconCandidate(candidateID)); + } else { + // legacy + recon.addCandidate(ReconCandidate.loadStreaming(jp)); + } + } + } else if ("service".equals(fieldName)) { + recon.service = jp.getText(); + } else if ("identifierSpace".equals(fieldName)) { + recon.identifierSpace = jp.getText(); + } else if ("schemaSpace".equals(fieldName)) { + recon.schemaSpace = jp.getText(); + } else if ("judgmentAction".equals(fieldName)) { + recon.judgmentAction = jp.getText(); + } else if ("judgmentBatchSize".equals(fieldName)) { + recon.judgmentBatchSize = jp.getIntValue(); + } else if ("matchRank".equals(fieldName)) { + recon.matchRank = jp.getIntValue(); + } + } + } + + return recon; + } +} diff --git a/main/src/com/google/refine/model/ReconCandidate.java b/main/src/com/google/refine/model/ReconCandidate.java new file mode 100644 index 000000000..bc0fc26ac --- /dev/null +++ b/main/src/com/google/refine/model/ReconCandidate.java @@ -0,0 +1,124 @@ +package com.google.refine.model; + +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +import org.codehaus.jackson.JsonFactory; +import org.codehaus.jackson.JsonParser; +import org.codehaus.jackson.JsonToken; +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.Jsonizable; +import com.google.refine.expr.HasFields; + +public class ReconCandidate implements HasFields, Jsonizable { + final public String id; + final public String name; + final public String[] types; + final public double score; + + public ReconCandidate(String topicID, String topicName, String[] typeIDs, double score) { + this.id = topicID; + this.name = topicName; + this.types = typeIDs; + this.score = score; + } + + public Object getField(String name, Properties bindings) { + if ("id".equals(name)) { + return id; + } else if ("name".equals(name)) { + return this.name; + } else if ("type".equals(name)) { + return types; + } else if ("score".equals(name)) { + return score; + } + return null; + } + + public boolean fieldAlsoHasFields(String name) { + return false; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("id"); writer.value(id); + writer.key("name"); writer.value(name); + writer.key("score"); writer.value(score); + + /* if (!options.containsKey("reconCandidateOmitTypes")) */ { + writer.key("types"); writer.array(); + for (String typeID : types) { + writer.value(typeID); + } + writer.endArray(); + } + + writer.endObject(); + } + + static public ReconCandidate loadStreaming(String s) throws Exception { + JsonFactory jsonFactory = new JsonFactory(); + JsonParser jp = jsonFactory.createJsonParser(s); + + if (jp.nextToken() != JsonToken.START_OBJECT) { + return null; + } + return loadStreaming(jp); + } + + static public ReconCandidate loadStreaming(JsonParser jp) throws Exception { + JsonToken t = jp.getCurrentToken(); + if (t == JsonToken.VALUE_NULL || t != JsonToken.START_OBJECT) { + return null; + } + + String id = null; + String name = null; + List types = null; + double score = 0; + + while (jp.nextToken() != JsonToken.END_OBJECT) { + String fieldName = jp.getCurrentName(); + jp.nextToken(); + + if ("id".equals(fieldName)) { + id = jp.getText(); + } else if ("name".equals(fieldName)) { + name = jp.getText(); + } else if ("score".equals(fieldName)) { + score = jp.getDoubleValue(); + } else if ("types".equals(fieldName)) { + if (jp.getCurrentToken() != JsonToken.START_ARRAY) { + return null; + } + + types = new ArrayList(); + + while (jp.nextToken() != JsonToken.END_ARRAY) { + types.add(jp.getText()); + } + } + } + + String[] typesA; + if (types != null) { + typesA = new String[types.size()]; + types.toArray(typesA); + } else { + typesA = new String[0]; + } + + return new ReconCandidate( + id, + name, + typesA, + score + ); + } +} \ No newline at end of file diff --git a/main/src/com/google/refine/model/ReconStats.java b/main/src/com/google/refine/model/ReconStats.java new file mode 100644 index 000000000..075f66f61 --- /dev/null +++ b/main/src/com/google/refine/model/ReconStats.java @@ -0,0 +1,74 @@ +package com.google.refine.model; + +import java.io.Writer; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.Jsonizable; +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.model.Recon.Judgment; + +public class ReconStats implements Jsonizable { + static public ReconStats load(JSONObject obj) throws Exception { + return new ReconStats( + obj.getInt("nonBlanks"), + obj.getInt("newTopics"), + obj.getInt("matchedTopics") + ); + } + + final public int nonBlanks; + final public int newTopics; + final public int matchedTopics; + + public ReconStats(int nonBlanks, int newTopics, int matchedTopics) { + this.nonBlanks = nonBlanks; + this.newTopics = newTopics; + this.matchedTopics = matchedTopics; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("nonBlanks"); writer.value(nonBlanks); + writer.key("newTopics"); writer.value(newTopics); + writer.key("matchedTopics"); writer.value(matchedTopics); + writer.endObject(); + } + + static public ReconStats create(Project project, int cellIndex) { + int nonBlanks = 0; + int newTopics = 0; + int matchedTopics = 0; + + for (Row row : project.rows) { + Cell cell = row.getCell(cellIndex); + if (cell != null && ExpressionUtils.isNonBlankData(cell.value)) { + nonBlanks++; + + if (cell.recon != null) { + if (cell.recon.judgment == Judgment.New) { + newTopics++; + } else if (cell.recon.judgment == Judgment.Matched) { + matchedTopics++; + } + } + } + } + + return new ReconStats(nonBlanks, newTopics, matchedTopics); + } + + public void save(Writer writer) { + JSONWriter jsonWriter = new JSONWriter(writer); + try { + write(jsonWriter, new Properties()); + } catch (JSONException e) { + e.printStackTrace(); + } + } +} diff --git a/main/src/com/google/refine/model/Record.java b/main/src/com/google/refine/model/Record.java new file mode 100644 index 000000000..d10162dff --- /dev/null +++ b/main/src/com/google/refine/model/Record.java @@ -0,0 +1,17 @@ +package com.google.refine.model; + +public class Record { + final public int fromRowIndex; + final public int toRowIndex; + final public int recordIndex; + + public Record( + int fromRowIndex, + int toRowIndex, + int recordIndex + ) { + this.fromRowIndex = fromRowIndex; + this.toRowIndex = toRowIndex; + this.recordIndex = recordIndex; + } +} diff --git a/main/src/com/google/refine/model/RecordModel.java b/main/src/com/google/refine/model/RecordModel.java new file mode 100644 index 000000000..7909288e1 --- /dev/null +++ b/main/src/com/google/refine/model/RecordModel.java @@ -0,0 +1,222 @@ +package com.google.refine.model; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.Jsonizable; +import com.google.refine.expr.ExpressionUtils; + +public class RecordModel implements Jsonizable { + final static public class CellDependency { + final public int rowIndex; + final public int cellIndex; + + public CellDependency(int rowIndex, int cellIndex) { + this.rowIndex = rowIndex; + this.cellIndex = cellIndex; + } + } + + final static public class RowDependency { + public int recordIndex; + public CellDependency[] cellDependencies; + public List contextRows; + } + + protected List _rowDependencies; + protected List _records; + + public RowDependency getRowDependency(int rowIndex) { + return _rowDependencies != null && rowIndex >= 0 && rowIndex < _rowDependencies.size() ? + _rowDependencies.get(rowIndex) : null; + } + + public int getRecordCount() { + return _records.size(); + } + + public Record getRecord(int recordIndex) { + return _records != null && recordIndex >= 0 && recordIndex < _records.size() ? + _records.get(recordIndex) : null; + } + + public Record getRecordOfRow(int rowIndex) { + RowDependency rd = getRowDependency(rowIndex); + if (rd != null) { + if (rd.recordIndex < 0) { + rd = getRowDependency(rd.contextRows.get(0)); + } + return getRecord(rd.recordIndex); + } + return null; + } + + synchronized public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("hasRecords"); writer.value(_records.size() < _rowDependencies.size()); + writer.endObject(); + } + + static protected class KeyedGroup { + int[] cellIndices; + int keyCellIndex; + } + + synchronized public void update(Project project) { + synchronized (project) { + List rows = project.rows; + int rowCount = rows.size(); + + ColumnModel columnModel = project.columnModel; + List keyedGroups = computeKeyedGroups(columnModel); + int groupCount = keyedGroups.size(); + + int[] lastNonBlankRowsByGroup = new int[keyedGroups.size()]; + for (int i = 0; i < lastNonBlankRowsByGroup.length; i++) { + lastNonBlankRowsByGroup[i] = -1; + } + + _rowDependencies = new ArrayList(rowCount); + + int recordIndex = 0; + for (int r = 0; r < rowCount; r++) { + Row row = rows.get(r); + RowDependency rowDependency = new RowDependency(); + + for (int g = 0; g < groupCount; g++) { + KeyedGroup group = keyedGroups.get(g); + + if (!ExpressionUtils.isNonBlankData(row.getCellValue(group.keyCellIndex))) { + int contextRowIndex = lastNonBlankRowsByGroup[g]; + if (contextRowIndex >= 0) { + for (int dependentCellIndex : group.cellIndices) { + if (ExpressionUtils.isNonBlankData(row.getCellValue(dependentCellIndex))) { + setRowDependency( + project, + rowDependency, + dependentCellIndex, + contextRowIndex, + group.keyCellIndex + ); + } + } + } + } else { + lastNonBlankRowsByGroup[g] = r; + } + } + + if (rowDependency.cellDependencies != null && rowDependency.cellDependencies.length > 0) { + rowDependency.recordIndex = -1; + rowDependency.contextRows = new ArrayList(); + for (CellDependency cd : rowDependency.cellDependencies) { + if (cd != null) { + rowDependency.contextRows.add(cd.rowIndex); + } + } + Collections.sort(rowDependency.contextRows); + } else { + rowDependency.recordIndex = recordIndex++; + } + + _rowDependencies.add(rowDependency); + } + + _records = new ArrayList(recordIndex); + if (recordIndex > 0) { + recordIndex = 0; + + int recordRowIndex = 0; + for (int r = 1; r < rowCount; r++) { + RowDependency rd = _rowDependencies.get(r); + if (rd.recordIndex >= 0) { + _records.add(new Record(recordRowIndex, r, recordIndex++)); + + recordIndex = rd.recordIndex; + recordRowIndex = r; + } + } + + _records.add(new Record(recordRowIndex, rowCount, recordIndex++)); + } + } + } + + protected List computeKeyedGroups(ColumnModel columnModel) { + List keyedGroups = new ArrayList(); + + addRootKeyedGroup(columnModel, keyedGroups); + + for (ColumnGroup group : columnModel.columnGroups) { + if (group.keyColumnIndex >= 0) { + KeyedGroup keyedGroup = new KeyedGroup(); + keyedGroup.keyCellIndex = columnModel.columns.get(group.keyColumnIndex).getCellIndex(); + keyedGroup.cellIndices = new int[group.columnSpan - 1]; + + int c = 0; + for (int i = 0; i < group.columnSpan; i++) { + int columnIndex = group.startColumnIndex + i; + if (columnIndex != group.keyColumnIndex && columnIndex < columnModel.columns.size()) { + int cellIndex = columnModel.columns.get(columnIndex).getCellIndex(); + keyedGroup.cellIndices[c++] = cellIndex; + } + } + + keyedGroups.add(keyedGroup); + } + } + + Collections.sort(keyedGroups, new Comparator() { + public int compare(KeyedGroup o1, KeyedGroup o2) { + return o2.cellIndices.length - o1.cellIndices.length; // larger groups first + } + }); + + return keyedGroups; + } + + protected void addRootKeyedGroup(ColumnModel columnModel, List keyedGroups) { + int count = columnModel.getMaxCellIndex() + 1; + if (count > 0 && columnModel.getKeyColumnIndex() < columnModel.columns.size()) { + KeyedGroup rootKeyedGroup = new KeyedGroup(); + + rootKeyedGroup.cellIndices = new int[count - 1]; + rootKeyedGroup.keyCellIndex = columnModel.columns.get(columnModel.getKeyColumnIndex()).getCellIndex(); + + for (int i = 0; i < count; i++) { + if (i < rootKeyedGroup.keyCellIndex) { + rootKeyedGroup.cellIndices[i] = i; + } else if (i > rootKeyedGroup.keyCellIndex) { + rootKeyedGroup.cellIndices[i - 1] = i; + } + } + keyedGroups.add(rootKeyedGroup); + } + } + + protected void setRowDependency( + Project project, + RowDependency rowDependency, + int cellIndex, + int contextRowIndex, + int contextCellIndex + ) { + if (rowDependency.cellDependencies == null) { + int count = project.columnModel.getMaxCellIndex() + 1; + + rowDependency.cellDependencies = new CellDependency[count]; + } + + rowDependency.cellDependencies[cellIndex] = + new CellDependency(contextRowIndex, contextCellIndex); + } + +} diff --git a/main/src/com/google/refine/model/Row.java b/main/src/com/google/refine/model/Row.java new file mode 100644 index 000000000..18959b458 --- /dev/null +++ b/main/src/com/google/refine/model/Row.java @@ -0,0 +1,201 @@ +package com.google.refine.model; + +import java.io.Writer; +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; +import java.util.Map.Entry; + +import org.codehaus.jackson.JsonFactory; +import org.codehaus.jackson.JsonParser; +import org.codehaus.jackson.JsonToken; +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.Jsonizable; +import com.google.refine.expr.CellTuple; +import com.google.refine.expr.HasFields; +import com.google.refine.util.Pool; + +public class Row implements HasFields, Jsonizable { + public boolean flagged; + public boolean starred; + final public List cells; + + private static final String FLAGGED = "flagged"; + private static final String STARRED = "starred"; + + public Row(int cellCount) { + cells = new ArrayList(cellCount); + } + + protected Row(List cells, boolean flagged, boolean starred) { + this.cells = cells; + this.flagged = flagged; + this.starred = starred; + } + + public Row dup() { + Row row = new Row(cells.size()); + row.flagged = flagged; + row.starred = starred; + row.cells.addAll(cells); + return row; + } + + public Object getField(String name, Properties bindings) { + if (FLAGGED.equals(name)) { + return flagged; + } else if (STARRED.equals(name)) { + return starred; + } + return null; + } + + public boolean fieldAlsoHasFields(String name) { + return "cells".equals(name) || "record".equals(name); + } + + public boolean isEmpty() { + for (Cell cell : cells) { + if (cell != null && cell.value != null && !isValueBlank(cell.value)) { + return false; + } + } + return true; + } + + public Cell getCell(int cellIndex) { + if (cellIndex >= 0 && cellIndex < cells.size()) { + return cells.get(cellIndex); + } else { + return null; + } + } + + public Object getCellValue(int cellIndex) { + if (cellIndex >= 0 && cellIndex < cells.size()) { + Cell cell = cells.get(cellIndex); + if (cell != null) { + return cell.value; + } + } + return null; + } + + public boolean isCellBlank(int cellIndex) { + return isValueBlank(getCellValue(cellIndex)); + } + + protected boolean isValueBlank(Object value) { + return value == null || (value instanceof String && ((String) value).trim().length() == 0); + } + + public void setCell(int cellIndex, Cell cell) { + if (cellIndex < cells.size()) { + cells.set(cellIndex, cell); + } else { + while (cellIndex > cells.size()) { + cells.add(null); + } + cells.add(cell); + } + } + + public CellTuple getCellTuple(Project project) { + return new CellTuple(project, this); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key(FLAGGED); writer.value(flagged); + writer.key(STARRED); writer.value(starred); + + writer.key("cells"); writer.array(); + for (Cell cell : cells) { + if (cell != null) { + cell.write(writer, options); + } else { + writer.value(null); + } + } + writer.endArray(); + + if (!"save".equals(options.getProperty("mode"))) { + if (options.containsKey("rowIndex")) { + int rowIndex = (Integer) options.get("rowIndex"); + writer.key("i"); writer.value(rowIndex); + + if (options.containsKey("recordIndex")) { + int recordIndex = (Integer) options.get("recordIndex"); + + writer.key("j"); writer.value(recordIndex); + } + } + + if (options.containsKey("extra")) { + Properties extra = (Properties) options.get("extra"); + if (extra != null) { + for (Entry e : extra.entrySet()) { + writer.key((String) e.getKey()); + writer.value(e.getValue()); + } + } + } + } + + writer.endObject(); + } + + public void save(Writer writer, Properties options) { + JSONWriter jsonWriter = new JSONWriter(writer); + try { + write(jsonWriter, options); + } catch (JSONException e) { + e.printStackTrace(); + } + } + + static public Row load(String s, Pool pool) throws Exception { + return s.length() == 0 ? null : + loadStreaming(s, pool); + } + + static public Row loadStreaming(String s, Pool pool) throws Exception { + JsonFactory jsonFactory = new JsonFactory(); + JsonParser jp = jsonFactory.createJsonParser(s); + + if (jp.nextToken() != JsonToken.START_OBJECT) { + return null; + } + + List cells = new ArrayList(); + boolean starred = false; + boolean flagged = false; + + while (jp.nextToken() != JsonToken.END_OBJECT) { + String fieldName = jp.getCurrentName(); + jp.nextToken(); + + if (STARRED.equals(fieldName)) { + starred = jp.getBooleanValue(); + } else if (FLAGGED.equals(fieldName)) { + flagged = jp.getBooleanValue(); + } else if ("cells".equals(fieldName)) { + if (jp.getCurrentToken() != JsonToken.START_ARRAY) { + return null; + } + + while (jp.nextToken() != JsonToken.END_ARRAY) { + Cell cell = Cell.loadStreaming(jp, pool); + + cells.add(cell); + } + } + } + + return (cells.size() > 0) ? new Row(cells, flagged, starred) : null; + } +} diff --git a/main/src/com/google/refine/model/changes/CellAtRow.java b/main/src/com/google/refine/model/changes/CellAtRow.java new file mode 100644 index 000000000..c07acb39b --- /dev/null +++ b/main/src/com/google/refine/model/changes/CellAtRow.java @@ -0,0 +1,35 @@ +package com.google.refine.model.changes; + +import java.io.IOException; +import java.io.Writer; +import java.util.Properties; + +import com.google.refine.model.Cell; +import com.google.refine.util.Pool; + +public class CellAtRow { + + final public int row; + final public Cell cell; + + public CellAtRow(int row, Cell cell) { + this.row = row; + this.cell = cell; + } + + public void save(Writer writer, Properties options) throws IOException { + writer.write(Integer.toString(row)); + writer.write(';'); + if (cell != null) { + cell.save(writer, options); + } + } + + static public CellAtRow load(String s, Pool pool) throws Exception { + int semicolon = s.indexOf(';'); + int row = Integer.parseInt(s.substring(0, semicolon)); + Cell cell = semicolon < s.length() - 1 ? Cell.loadStreaming(s.substring(semicolon + 1), pool) : null; + + return new CellAtRow(row, cell); + } +} diff --git a/main/src/com/google/refine/model/changes/CellChange.java b/main/src/com/google/refine/model/changes/CellChange.java new file mode 100644 index 000000000..07792d87d --- /dev/null +++ b/main/src/com/google/refine/model/changes/CellChange.java @@ -0,0 +1,82 @@ +package com.google.refine.model.changes; + +import java.io.IOException; +import java.io.LineNumberReader; +import java.io.Writer; +import java.util.Properties; + +import com.google.refine.history.Change; +import com.google.refine.model.Cell; +import com.google.refine.model.Project; +import com.google.refine.util.Pool; + +public class CellChange implements Change { + final public int row; + final public int cellIndex; + final public Cell oldCell; + final public Cell newCell; + + public CellChange(int row, int cellIndex, Cell oldCell, Cell newCell) { + this.row = row; + this.cellIndex = cellIndex; + this.oldCell = oldCell; + this.newCell = newCell; + } + + public void apply(Project project) { + project.rows.get(row).setCell(cellIndex, newCell); + + project.columnModel.getColumnByCellIndex(cellIndex).clearPrecomputes(); + } + + public void revert(Project project) { + project.rows.get(row).setCell(cellIndex, oldCell); + + project.columnModel.getColumnByCellIndex(cellIndex).clearPrecomputes(); + } + + public void save(Writer writer, Properties options) throws IOException { + writer.write("row="); writer.write(Integer.toString(row)); writer.write('\n'); + writer.write("cell="); writer.write(Integer.toString(cellIndex)); writer.write('\n'); + + writer.write("old="); + if (oldCell != null) { + oldCell.save(writer, options); // one liner + } + writer.write('\n'); + + writer.write("new="); + if (newCell != null) { + newCell.save(writer, options); // one liner + } + writer.write('\n'); + + writer.write("/ec/\n"); // end of change marker + } + + static public CellChange load(LineNumberReader reader, Pool pool) throws Exception { + int row = -1; + int cellIndex = -1; + Cell oldCell = null; + Cell newCell = null; + + String line; + while ((line = reader.readLine()) != null && !"/ec/".equals(line)) { + int equal = line.indexOf('='); + CharSequence field = line.subSequence(0, equal); + String value = line.substring(equal + 1); + + if ("row".equals(field)) { + row = Integer.parseInt(value); + } else if ("cell".equals(field)) { + cellIndex = Integer.parseInt(value); + } else if ("new".equals(field) && value.length() > 0) { + newCell = Cell.loadStreaming(value, pool); + } else if ("old".equals(field) && value.length() > 0) { + oldCell = Cell.loadStreaming(value, pool); + } + } + + return new CellChange(row, cellIndex, oldCell, newCell); + } +} diff --git a/main/src/com/google/refine/model/changes/ColumnAdditionChange.java b/main/src/com/google/refine/model/changes/ColumnAdditionChange.java new file mode 100644 index 000000000..b65413f9a --- /dev/null +++ b/main/src/com/google/refine/model/changes/ColumnAdditionChange.java @@ -0,0 +1,109 @@ +package com.google.refine.model.changes; + +import java.io.IOException; +import java.io.LineNumberReader; +import java.io.Writer; +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +import com.google.refine.history.Change; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.Row; +import com.google.refine.util.Pool; + +public class ColumnAdditionChange extends ColumnChange { + final protected String _columnName; + final protected int _columnIndex; + final protected CellAtRow[] _newCells; + protected int _newCellIndex = -1; + + public ColumnAdditionChange(String columnName, int columnIndex, List newCells) { + _columnName = columnName; + _columnIndex = columnIndex; + _newCells = new CellAtRow[newCells.size()]; + newCells.toArray(_newCells); + } + + public void apply(Project project) { + synchronized (project) { + if (_newCellIndex < 0) { + _newCellIndex = project.columnModel.allocateNewCellIndex(); + } + + Column column = new Column(_newCellIndex, _columnName); + + project.columnModel.columns.add(_columnIndex, column); + try { + for (CellAtRow cell : _newCells) { + project.rows.get(cell.row).setCell(_newCellIndex, cell.cell); + } + } catch (Exception e) { + e.printStackTrace(); + } + project.update(); + } + } + + public void revert(Project project) { + synchronized (project) { + for (CellAtRow cell : _newCells) { + Row row = project.rows.get(cell.row); + row.setCell(_newCellIndex, null); + } + + project.columnModel.columns.remove(_columnIndex); + + project.update(); + } + } + + public void save(Writer writer, Properties options) throws IOException { + writer.write("columnName="); writer.write(_columnName); writer.write('\n'); + writer.write("columnIndex="); writer.write(Integer.toString(_columnIndex)); writer.write('\n'); + writer.write("newCellIndex="); writer.write(Integer.toString(_newCellIndex)); writer.write('\n'); + writer.write("newCellCount="); writer.write(Integer.toString(_newCells.length)); writer.write('\n'); + for (CellAtRow c : _newCells) { + c.save(writer, options); + writer.write('\n'); + } + writer.write("/ec/\n"); // end of change marker + } + + static public Change load(LineNumberReader reader, Pool pool) throws Exception { + String columnName = null; + int columnIndex = -1; + int newCellIndex = -1; + List newCells = null; + + String line; + while ((line = reader.readLine()) != null && !"/ec/".equals(line)) { + int equal = line.indexOf('='); + CharSequence field = line.subSequence(0, equal); + + if ("columnName".equals(field)) { + columnName = line.substring(equal + 1); + } else if ("columnIndex".equals(field)) { + columnIndex = Integer.parseInt(line.substring(equal + 1)); + } else if ("newCellIndex".equals(field)) { + newCellIndex = Integer.parseInt(line.substring(equal + 1)); + } else if ("newCellCount".equals(field)) { + int newCellCount = Integer.parseInt(line.substring(equal + 1)); + + newCells = new ArrayList(newCellCount); + for (int i = 0; i < newCellCount; i++) { + line = reader.readLine(); + if (line != null) { + newCells.add(CellAtRow.load(line, pool)); + } + } + } + } + + ColumnAdditionChange change = new ColumnAdditionChange(columnName, columnIndex, newCells); + change._newCellIndex = newCellIndex; + + return change; + } +} diff --git a/main/src/com/google/refine/model/changes/ColumnChange.java b/main/src/com/google/refine/model/changes/ColumnChange.java new file mode 100644 index 000000000..935da2a2d --- /dev/null +++ b/main/src/com/google/refine/model/changes/ColumnChange.java @@ -0,0 +1,6 @@ +package com.google.refine.model.changes; + +import com.google.refine.history.Change; + +abstract public class ColumnChange implements Change { +} diff --git a/main/src/com/google/refine/model/changes/ColumnMoveChange.java b/main/src/com/google/refine/model/changes/ColumnMoveChange.java new file mode 100644 index 000000000..58e728d3c --- /dev/null +++ b/main/src/com/google/refine/model/changes/ColumnMoveChange.java @@ -0,0 +1,75 @@ +package com.google.refine.model.changes; + +import java.io.IOException; +import java.io.LineNumberReader; +import java.io.Writer; +import java.util.Properties; + +import com.google.refine.history.Change; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.util.Pool; + +public class ColumnMoveChange extends ColumnChange { + final protected String _columnName; + final protected int _newColumnIndex; + protected int _oldColumnIndex; + + public ColumnMoveChange(String columnName, int index) { + _columnName = columnName; + _newColumnIndex = index; + } + + public void apply(Project project) { + synchronized (project) { + _oldColumnIndex = project.columnModel.getColumnIndexByName(_columnName); + + Column column = project.columnModel.columns.remove(_oldColumnIndex); + project.columnModel.columns.add(_newColumnIndex, column); + + project.update(); + } + } + + public void revert(Project project) { + synchronized (project) { + Column column = project.columnModel.columns.remove(_newColumnIndex); + project.columnModel.columns.add(_oldColumnIndex, column); + + project.update(); + } + } + + public void save(Writer writer, Properties options) throws IOException { + writer.write("columnName="); writer.write(_columnName); writer.write('\n'); + writer.write("oldColumnIndex="); writer.write(Integer.toString(_oldColumnIndex)); writer.write('\n'); + writer.write("newColumnIndex="); writer.write(Integer.toString(_newColumnIndex)); writer.write('\n'); + writer.write("/ec/\n"); // end of change marker + } + + static public Change load(LineNumberReader reader, Pool pool) throws Exception { + String columnName = null; + int oldColumnIndex = -1; + int newColumnIndex = -1; + + String line; + while ((line = reader.readLine()) != null && !"/ec/".equals(line)) { + int equal = line.indexOf('='); + CharSequence field = line.subSequence(0, equal); + + String value = line.substring(equal + 1); + if ("oldColumnIndex".equals(field)) { + oldColumnIndex = Integer.parseInt(value); + } else if ("newColumnIndex".equals(field)) { + newColumnIndex = Integer.parseInt(value); + } else if ("columnName".equals(field)) { + columnName = value; + } + } + + ColumnMoveChange change = new ColumnMoveChange(columnName, newColumnIndex); + change._oldColumnIndex = oldColumnIndex; + + return change; + } +} diff --git a/main/src/com/google/refine/model/changes/ColumnRemovalChange.java b/main/src/com/google/refine/model/changes/ColumnRemovalChange.java new file mode 100644 index 000000000..fbb6f33f1 --- /dev/null +++ b/main/src/com/google/refine/model/changes/ColumnRemovalChange.java @@ -0,0 +1,103 @@ +package com.google.refine.model.changes; + +import java.io.IOException; +import java.io.LineNumberReader; +import java.io.Writer; +import java.util.Properties; + +import com.google.refine.history.Change; +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.Row; +import com.google.refine.util.Pool; + +public class ColumnRemovalChange extends ColumnChange { + final protected int _oldColumnIndex; + protected Column _oldColumn; + protected CellAtRow[] _oldCells; + + public ColumnRemovalChange(int index) { + _oldColumnIndex = index; + } + + public void apply(Project project) { + synchronized (project) { + _oldColumn = project.columnModel.columns.remove(_oldColumnIndex); + _oldCells = new CellAtRow[project.rows.size()]; + + int cellIndex = _oldColumn.getCellIndex(); + for (int i = 0; i < _oldCells.length; i++) { + Row row = project.rows.get(i); + + Cell oldCell = null; + if (cellIndex < row.cells.size()) { + oldCell = row.cells.get(cellIndex); + } + _oldCells[i] = new CellAtRow(i, oldCell); + + row.setCell(cellIndex, null); + } + + project.update(); + } + } + + public void revert(Project project) { + synchronized (project) { + project.columnModel.columns.add(_oldColumnIndex, _oldColumn); + + int cellIndex = _oldColumn.getCellIndex(); + for (CellAtRow cell : _oldCells) { + project.rows.get(cell.row).cells.set(cellIndex, cell.cell); + } + + project.update(); + } + } + + public void save(Writer writer, Properties options) throws IOException { + writer.write("oldColumnIndex="); writer.write(Integer.toString(_oldColumnIndex)); writer.write('\n'); + writer.write("oldColumn="); _oldColumn.save(writer); writer.write('\n'); + writer.write("oldCellCount="); writer.write(Integer.toString(_oldCells.length)); writer.write('\n'); + for (CellAtRow c : _oldCells) { + c.save(writer, options); + writer.write('\n'); + } + writer.write("/ec/\n"); // end of change marker + } + + static public Change load(LineNumberReader reader, Pool pool) throws Exception { + int oldColumnIndex = -1; + Column oldColumn = null; + CellAtRow[] oldCells = null; + + String line; + while ((line = reader.readLine()) != null && !"/ec/".equals(line)) { + int equal = line.indexOf('='); + CharSequence field = line.subSequence(0, equal); + + if ("oldColumnIndex".equals(field)) { + oldColumnIndex = Integer.parseInt(line.substring(equal + 1)); + } else if ("oldColumn".equals(field)) { + oldColumn = Column.load(line.substring(equal + 1)); + } else if ("oldCellCount".equals(field)) { + int oldCellCount = Integer.parseInt(line.substring(equal + 1)); + + oldCells = new CellAtRow[oldCellCount]; + for (int i = 0; i < oldCellCount; i++) { + line = reader.readLine(); + if (line != null) { + oldCells[i] = CellAtRow.load(line, pool); + } + } + } + } + + ColumnRemovalChange change = new ColumnRemovalChange(oldColumnIndex); + change._oldColumn = oldColumn; + change._oldCells = oldCells; + + return change; + } +} diff --git a/main/src/com/google/refine/model/changes/ColumnRenameChange.java b/main/src/com/google/refine/model/changes/ColumnRenameChange.java new file mode 100644 index 000000000..66278947c --- /dev/null +++ b/main/src/com/google/refine/model/changes/ColumnRenameChange.java @@ -0,0 +1,62 @@ +package com.google.refine.model.changes; + +import java.io.IOException; +import java.io.LineNumberReader; +import java.io.Writer; +import java.util.Properties; + +import com.google.refine.history.Change; +import com.google.refine.model.Project; +import com.google.refine.util.Pool; + +public class ColumnRenameChange extends ColumnChange { + final protected String _oldColumnName; + final protected String _newColumnName; + + public ColumnRenameChange(String oldColumnName, String newColumnName) { + _oldColumnName = oldColumnName; + _newColumnName = newColumnName; + } + + public void apply(Project project) { + synchronized (project) { + project.columnModel.getColumnByName(_oldColumnName).setName(_newColumnName); + project.columnModel.update(); + } + } + + public void revert(Project project) { + synchronized (project) { + project.columnModel.getColumnByName(_newColumnName).setName(_oldColumnName); + project.columnModel.update(); + } + } + + public void save(Writer writer, Properties options) throws IOException { + writer.write("oldColumnName="); writer.write(_oldColumnName); writer.write('\n'); + writer.write("newColumnName="); writer.write(_newColumnName); writer.write('\n'); + writer.write("/ec/\n"); // end of change marker + } + + static public Change load(LineNumberReader reader, Pool pool) throws Exception { + String oldColumnName = null; + String newColumnName = null; + + String line; + while ((line = reader.readLine()) != null && !"/ec/".equals(line)) { + int equal = line.indexOf('='); + CharSequence field = line.subSequence(0, equal); + String value = line.substring(equal + 1); + + if ("oldColumnName".equals(field)) { + oldColumnName = value; + } else if ("newColumnName".equals(field)) { + newColumnName = value; + } + } + + ColumnRenameChange change = new ColumnRenameChange(oldColumnName, newColumnName); + + return change; + } +} diff --git a/main/src/com/google/refine/model/changes/ColumnReorderChange.java b/main/src/com/google/refine/model/changes/ColumnReorderChange.java new file mode 100644 index 000000000..e3491da80 --- /dev/null +++ b/main/src/com/google/refine/model/changes/ColumnReorderChange.java @@ -0,0 +1,114 @@ +package com.google.refine.model.changes; + +import java.io.IOException; +import java.io.LineNumberReader; +import java.io.Writer; +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +import com.google.refine.history.Change; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.util.Pool; + +public class ColumnReorderChange extends ColumnChange { + final protected List _columnNames; + protected List _oldColumns; + protected List _newColumns; + + public ColumnReorderChange(List columnNames) { + _columnNames = columnNames; + } + + public void apply(Project project) { + synchronized (project) { + if (_newColumns == null) { + _newColumns = new ArrayList(); + _oldColumns = new ArrayList(project.columnModel.columns); + + for (String n : _columnNames) { + Column column = project.columnModel.getColumnByName(n); + if (column != null) { + _newColumns.add(column); + } + } + } + + project.columnModel.columns.clear(); + project.columnModel.columns.addAll(_newColumns); + project.update(); + } + } + + public void revert(Project project) { + synchronized (project) { + project.columnModel.columns.clear(); + project.columnModel.columns.addAll(_oldColumns); + project.update(); + } + } + + public void save(Writer writer, Properties options) throws IOException { + writer.write("columnNameCount="); writer.write(Integer.toString(_columnNames.size())); writer.write('\n'); + for (String n : _columnNames) { + writer.write(n); + writer.write('\n'); + } + writer.write("oldColumnCount="); writer.write(Integer.toString(_oldColumns.size())); writer.write('\n'); + for (Column c : _oldColumns) { + c.save(writer); + writer.write('\n'); + } + writer.write("newColumnCount="); writer.write(Integer.toString(_newColumns.size())); writer.write('\n'); + for (Column c : _newColumns) { + c.save(writer); + writer.write('\n'); + } + writer.write("/ec/\n"); // end of change marker + } + + static public Change load(LineNumberReader reader, Pool pool) throws Exception { + List columnNames = new ArrayList(); + List oldColumns = new ArrayList(); + List newColumns = new ArrayList(); + + String line; + while ((line = reader.readLine()) != null && !"/ec/".equals(line)) { + int equal = line.indexOf('='); + CharSequence field = line.subSequence(0, equal); + + if ("columnNameCount".equals(field)) { + int count = Integer.parseInt(line.substring(equal + 1)); + for (int i = 0; i < count; i++) { + line = reader.readLine(); + if (line != null) { + columnNames.add(line); + } + } + } else if ("oldColumnCount".equals(field)) { + int count = Integer.parseInt(line.substring(equal + 1)); + for (int i = 0; i < count; i++) { + line = reader.readLine(); + if (line != null) { + oldColumns.add(Column.load(line)); + } + } + } else if ("newColumnCount".equals(field)) { + int count = Integer.parseInt(line.substring(equal + 1)); + for (int i = 0; i < count; i++) { + line = reader.readLine(); + if (line != null) { + newColumns.add(Column.load(line)); + } + } + } + } + + ColumnReorderChange change = new ColumnReorderChange(columnNames); + change._oldColumns = oldColumns; + change._newColumns = newColumns; + + return change; + } +} diff --git a/main/src/com/google/refine/model/changes/ColumnSplitChange.java b/main/src/com/google/refine/model/changes/ColumnSplitChange.java new file mode 100644 index 000000000..4574a6068 --- /dev/null +++ b/main/src/com/google/refine/model/changes/ColumnSplitChange.java @@ -0,0 +1,329 @@ +package com.google.refine.model.changes; + +import java.io.IOException; +import java.io.LineNumberReader; +import java.io.Serializable; +import java.io.Writer; +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +import org.json.JSONObject; +import org.json.JSONTokener; + +import com.google.refine.history.Change; +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.Row; +import com.google.refine.util.Pool; + +public class ColumnSplitChange implements Change { + final protected String _columnName; + + final protected List _columnNames; + final protected List _rowIndices; + final protected List> _tuples; + + final protected boolean _removeOriginalColumn; + + protected Column _column; + protected int _columnIndex; + + protected int _firstNewCellIndex = -1; + protected List _oldRows; + protected List _newRows; + + public ColumnSplitChange( + String columnName, + List columnNames, + List rowIndices, + List> tuples, + boolean removeOriginalColumn + ) { + _columnName = columnName; + + _columnNames = columnNames; + _rowIndices = rowIndices; + _tuples = tuples; + + _removeOriginalColumn = removeOriginalColumn; + } + + protected ColumnSplitChange( + String columnName, + List columnNames, + List rowIndices, + List> tuples, + boolean removeOriginalColumn, + + Column column, + int columnIndex, + + int firstNewCellIndex, + List oldRows, + List newRows + ) { + _columnName = columnName; + + _columnNames = columnNames; + _rowIndices = rowIndices; + _tuples = tuples; + + _removeOriginalColumn = removeOriginalColumn; + + _column = column; + _columnIndex = columnIndex; + + _firstNewCellIndex = firstNewCellIndex; + _oldRows = oldRows; + _newRows = newRows; + } + + public void apply(Project project) { + synchronized (project) { + if (_firstNewCellIndex < 0) { + _firstNewCellIndex = project.columnModel.allocateNewCellIndex(); + for (int i = 1; i < _columnNames.size(); i++) { + project.columnModel.allocateNewCellIndex(); + } + + _column = project.columnModel.getColumnByName(_columnName); + _columnIndex = project.columnModel.getColumnIndexByName(_columnName); + + _oldRows = new ArrayList(_rowIndices.size()); + _newRows = new ArrayList(_rowIndices.size()); + + int cellIndex = _column.getCellIndex(); + + for (int i = 0; i < _rowIndices.size(); i++) { + int r = _rowIndices.get(i); + List tuple = _tuples.get(i); + + Row oldRow = project.rows.get(r); + Row newRow = oldRow.dup(); + + _oldRows.add(oldRow); + _newRows.add(newRow); + + for (int c = 0; c < tuple.size(); c++) { + Serializable value = tuple.get(c); + if (value != null) { + newRow.setCell(_firstNewCellIndex + c, new Cell(value, null)); + } + } + + if (_removeOriginalColumn) { + newRow.setCell(cellIndex, null); + } + } + } + + for (int i = 0; i < _rowIndices.size(); i++) { + int r = _rowIndices.get(i); + Row newRow = _newRows.get(i); + + project.rows.set(r, newRow); + } + + for (int i = 0; i < _columnNames.size(); i++) { + String name = _columnNames.get(i); + int cellIndex = _firstNewCellIndex + i; + + Column column = new Column(cellIndex, name); + + project.columnModel.columns.add(_columnIndex + 1 + i, column); + } + + if (_removeOriginalColumn) { + project.columnModel.columns.remove(_columnIndex); + } + + project.update(); + } + } + + public void revert(Project project) { + synchronized (project) { + for (int i = 0; i < _rowIndices.size(); i++) { + int r = _rowIndices.get(i); + Row oldRow = _oldRows.get(i); + + project.rows.set(r, oldRow); + } + + if (_removeOriginalColumn) { + project.columnModel.columns.add(_columnIndex, _column); + } + + for (int i = 0; i < _columnNames.size(); i++) { + project.columnModel.columns.remove(_columnIndex + 1); + } + + project.update(); + } + } + + public void save(Writer writer, Properties options) throws IOException { + writer.write("columnName="); writer.write(_columnName); writer.write('\n'); + + writer.write("columnNameCount="); writer.write(Integer.toString(_columnNames.size())); writer.write('\n'); + for (String name : _columnNames) { + writer.write(name); writer.write('\n'); + } + writer.write("rowIndexCount="); writer.write(Integer.toString(_rowIndices.size())); writer.write('\n'); + for (Integer rowIndex : _rowIndices) { + writer.write(rowIndex.toString()); writer.write('\n'); + } + writer.write("tupleCount="); writer.write(Integer.toString(_tuples.size())); writer.write('\n'); + for (List tuple : _tuples) { + writer.write(Integer.toString(tuple.size())); writer.write('\n'); + + for (Serializable value : tuple) { + if (value == null) { + writer.write("null"); + } else if (value instanceof String) { + writer.write(JSONObject.quote((String) value)); + } else { + writer.write(value.toString()); + } + writer.write('\n'); + } + } + writer.write("removeOriginalColumn="); writer.write(Boolean.toString(_removeOriginalColumn)); writer.write('\n'); + + writer.write("column="); _column.save(writer); writer.write('\n'); + writer.write("columnIndex="); writer.write(Integer.toString(_columnIndex)); writer.write('\n'); + + writer.write("firstNewCellIndex="); writer.write(Integer.toString(_firstNewCellIndex)); writer.write('\n'); + + writer.write("newRowCount="); writer.write(Integer.toString(_newRows.size())); writer.write('\n'); + for (Row row : _newRows) { + row.save(writer, options); + writer.write('\n'); + } + writer.write("oldRowCount="); writer.write(Integer.toString(_oldRows.size())); writer.write('\n'); + for (Row row : _oldRows) { + row.save(writer, options); + writer.write('\n'); + } + writer.write("/ec/\n"); // end of change marker + } + + static public Change load(LineNumberReader reader, Pool pool) throws Exception { + String columnName = null; + List columnNames = null; + List rowIndices = null; + List> tuples = null; + boolean removeOriginalColumn = false; + + Column column = null; + int columnIndex = -1; + + int firstNewCellIndex = -1; + List oldRows = null; + List newRows = null; + + String line; + while ((line = reader.readLine()) != null && !"/ec/".equals(line)) { + int equal = line.indexOf('='); + CharSequence field = line.subSequence(0, equal); + String value = line.substring(equal + 1); + + if ("columnName".equals(field)) { + columnName = value; + } else if ("columnNameCount".equals(field)) { + int count = Integer.parseInt(value); + + columnNames = new ArrayList(count); + for (int i = 0; i < count; i++) { + line = reader.readLine(); + if (line != null) { + columnNames.add(line); + } + } + } else if ("rowIndexCount".equals(field)) { + int count = Integer.parseInt(value); + + rowIndices = new ArrayList(count); + for (int i = 0; i < count; i++) { + line = reader.readLine(); + if (line != null) { + rowIndices.add(Integer.parseInt(line)); + } + } + } else if ("tupleCount".equals(field)) { + int count = Integer.parseInt(value); + + tuples = new ArrayList>(count); + for (int i = 0; i < count; i++) { + line = reader.readLine(); + + if (line == null) continue; + + int valueCount = Integer.parseInt(line); + + List tuple = new ArrayList(valueCount); + for (int r = 0; r < valueCount; r++) { + line = reader.readLine(); + + JSONTokener t = new JSONTokener(line); + Object o = t.nextValue(); + + tuple.add((o != JSONObject.NULL) ? (Serializable) o : null); + } + + tuples.add(tuple); + } + } else if ("removeOriginalColumn".equals(field)) { + removeOriginalColumn = Boolean.parseBoolean(value); + + } else if ("column".equals(field)) { + column = Column.load(value); + } else if ("columnIndex".equals(field)) { + columnIndex = Integer.parseInt(value); + } else if ("firstNewCellIndex".equals(field)) { + firstNewCellIndex = Integer.parseInt(value); + } else if ("oldRowCount".equals(field)) { + int count = Integer.parseInt(value); + + oldRows = new ArrayList(count); + for (int i = 0; i < count; i++) { + line = reader.readLine(); + if (line != null) { + oldRows.add(Row.load(line, pool)); + } + } + } else if ("newRowCount".equals(field)) { + int count = Integer.parseInt(value); + + newRows = new ArrayList(count); + for (int i = 0; i < count; i++) { + line = reader.readLine(); + if (line != null) { + newRows.add(Row.load(line, pool)); + } + } + } + + } + + ColumnSplitChange change = new ColumnSplitChange( + columnName, + columnNames, + rowIndices, + tuples, + removeOriginalColumn, + + column, + columnIndex, + + firstNewCellIndex, + oldRows, + newRows + ); + + + return change; + } +} diff --git a/main/src/com/google/refine/model/changes/DataExtensionChange.java b/main/src/com/google/refine/model/changes/DataExtensionChange.java new file mode 100644 index 000000000..43e9cb7a6 --- /dev/null +++ b/main/src/com/google/refine/model/changes/DataExtensionChange.java @@ -0,0 +1,431 @@ +package com.google.refine.model.changes; + +import java.io.IOException; +import java.io.LineNumberReader; +import java.io.Serializable; +import java.io.Writer; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.history.Change; +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.ModelException; +import com.google.refine.model.Project; +import com.google.refine.model.Recon; +import com.google.refine.model.ReconCandidate; +import com.google.refine.model.ReconStats; +import com.google.refine.model.Row; +import com.google.refine.model.Recon.Judgment; +import com.google.refine.model.recon.DataExtensionReconConfig; +import com.google.refine.protograph.FreebaseType; +import com.google.refine.util.ParsingUtilities; +import com.google.refine.util.Pool; +import com.google.refine.util.FreebaseDataExtensionJob.DataExtension; + +public class DataExtensionChange implements Change { + final protected String _baseColumnName; + final protected int _columnInsertIndex; + + final protected List _columnNames; + final protected List _columnTypes; + + final protected List _rowIndices; + final protected List _dataExtensions; + + protected long _historyEntryID; + protected int _firstNewCellIndex = -1; + protected List _oldRows; + protected List _newRows; + + public DataExtensionChange( + String baseColumnName, + int columnInsertIndex, + List columnNames, + List columnTypes, + List rowIndices, + List dataExtensions, + long historyEntryID + ) { + _baseColumnName = baseColumnName; + _columnInsertIndex = columnInsertIndex; + + _columnNames = columnNames; + _columnTypes = columnTypes; + + _rowIndices = rowIndices; + _dataExtensions = dataExtensions; + + _historyEntryID = historyEntryID; + } + + protected DataExtensionChange( + String baseColumnName, + int columnInsertIndex, + + List columnNames, + List columnTypes, + + List rowIndices, + List dataExtensions, + int firstNewCellIndex, + List oldRows, + List newRows + ) { + _baseColumnName = baseColumnName; + _columnInsertIndex = columnInsertIndex; + + _columnNames = columnNames; + _columnTypes = columnTypes; + + _rowIndices = rowIndices; + _dataExtensions = dataExtensions; + + _firstNewCellIndex = firstNewCellIndex; + _oldRows = oldRows; + _newRows = newRows; + } + + public void apply(Project project) { + synchronized (project) { + if (_firstNewCellIndex < 0) { + _firstNewCellIndex = project.columnModel.allocateNewCellIndex(); + for (int i = 1; i < _columnNames.size(); i++) { + project.columnModel.allocateNewCellIndex(); + } + + _oldRows = new ArrayList(project.rows); + + _newRows = new ArrayList(project.rows.size()); + + int cellIndex = project.columnModel.getColumnByName(_baseColumnName).getCellIndex(); + int keyCellIndex = project.columnModel.columns.get(project.columnModel.getKeyColumnIndex()).getCellIndex(); + int index = 0; + + int rowIndex = index < _rowIndices.size() ? _rowIndices.get(index) : _oldRows.size(); + DataExtension dataExtension = index < _rowIndices.size() ? _dataExtensions.get(index) : null; + + index++; + + Map reconMap = new HashMap(); + + for (int r = 0; r < _oldRows.size(); r++) { + Row oldRow = _oldRows.get(r); + if (r < rowIndex) { + _newRows.add(oldRow.dup()); + continue; + } + + if (dataExtension == null || dataExtension.data.length == 0) { + _newRows.add(oldRow); + } else { + Row firstNewRow = oldRow.dup(); + extendRow(firstNewRow, dataExtension, 0, reconMap); + _newRows.add(firstNewRow); + + int r2 = r + 1; + for (int subR = 1; subR < dataExtension.data.length; subR++) { + if (r2 < project.rows.size()) { + Row oldRow2 = project.rows.get(r2); + if (oldRow2.isCellBlank(cellIndex) && + oldRow2.isCellBlank(keyCellIndex)) { + + Row newRow = oldRow2.dup(); + extendRow(newRow, dataExtension, subR, reconMap); + + _newRows.add(newRow); + r2++; + + continue; + } + } + + Row newRow = new Row(cellIndex + _columnNames.size()); + extendRow(newRow, dataExtension, subR, reconMap); + + _newRows.add(newRow); + } + + r = r2 - 1; // r will be incremented by the for loop anyway + } + + rowIndex = index < _rowIndices.size() ? _rowIndices.get(index) : _oldRows.size(); + dataExtension = index < _rowIndices.size() ? _dataExtensions.get(index) : null; + index++; + } + } + + project.rows.clear(); + project.rows.addAll(_newRows); + + for (int i = 0; i < _columnNames.size(); i++) { + String name = _columnNames.get(i); + int cellIndex = _firstNewCellIndex + i; + + Column column = new Column(cellIndex, name); + column.setReconConfig(new DataExtensionReconConfig(_columnTypes.get(i))); + column.setReconStats(ReconStats.create(project, cellIndex)); + + try { + project.columnModel.addColumn(_columnInsertIndex + i, column, true); + + // the column might have been renamed to avoid collision + _columnNames.set(i, column.getName()); + } catch (ModelException e) { + // won't get here since we set the avoid collision flag + } + } + + project.update(); + } + } + + protected void extendRow( + Row row, + DataExtension dataExtension, + int extensionRowIndex, + Map reconMap + ) { + Object[] values = dataExtension.data[extensionRowIndex]; + for (int c = 0; c < values.length; c++) { + Object value = values[c]; + Cell cell = null; + + if (value instanceof ReconCandidate) { + ReconCandidate rc = (ReconCandidate) value; + Recon recon; + if (reconMap.containsKey(rc.id)) { + recon = reconMap.get(rc.id); + } else { + recon = Recon.makeFreebaseRecon(_historyEntryID); + recon.addCandidate(rc); + recon.service = "mql"; + recon.match = rc; + recon.matchRank = 0; + recon.judgment = Judgment.Matched; + recon.judgmentAction = "auto"; + recon.judgmentBatchSize = 1; + + reconMap.put(rc.id, recon); + } + cell = new Cell(rc.name, recon); + } else { + cell = new Cell((Serializable) value, null); + } + + row.setCell(_firstNewCellIndex + c, cell); + } + } + + public void revert(Project project) { + synchronized (project) { + project.rows.clear(); + project.rows.addAll(_oldRows); + + for (int i = 0; i < _columnNames.size(); i++) { + project.columnModel.columns.remove(_columnInsertIndex); + } + + project.update(); + } + } + + public void save(Writer writer, Properties options) throws IOException { + writer.write("baseColumnName="); writer.write(_baseColumnName); writer.write('\n'); + writer.write("columnInsertIndex="); writer.write(Integer.toString(_columnInsertIndex)); writer.write('\n'); + writer.write("columnNameCount="); writer.write(Integer.toString(_columnNames.size())); writer.write('\n'); + for (String name : _columnNames) { + writer.write(name); writer.write('\n'); + } + writer.write("columnTypeCount="); writer.write(Integer.toString(_columnTypes.size())); writer.write('\n'); + for (FreebaseType type : _columnTypes) { + try { + JSONWriter jsonWriter = new JSONWriter(writer); + + type.write(jsonWriter, options); + } catch (JSONException e) { + // ??? + } + writer.write('\n'); + } + writer.write("rowIndexCount="); writer.write(Integer.toString(_rowIndices.size())); writer.write('\n'); + for (Integer rowIndex : _rowIndices) { + writer.write(rowIndex.toString()); writer.write('\n'); + } + writer.write("dataExtensionCount="); writer.write(Integer.toString(_dataExtensions.size())); writer.write('\n'); + for (DataExtension dataExtension : _dataExtensions) { + if (dataExtension == null) { + writer.write('\n'); + continue; + } + + writer.write(Integer.toString(dataExtension.data.length)); writer.write('\n'); + + for (Object[] values : dataExtension.data) { + for (Object value : values) { + if (value == null) { + writer.write("null"); + } else if (value instanceof ReconCandidate) { + try { + JSONWriter jsonWriter = new JSONWriter(writer); + ((ReconCandidate) value).write(jsonWriter, options); + } catch (JSONException e) { + // ??? + } + } else if (value instanceof String) { + writer.write(JSONObject.quote((String) value)); + } else { + writer.write(value.toString()); + } + writer.write('\n'); + } + } + } + + writer.write("firstNewCellIndex="); writer.write(Integer.toString(_firstNewCellIndex)); writer.write('\n'); + + writer.write("newRowCount="); writer.write(Integer.toString(_newRows.size())); writer.write('\n'); + for (Row row : _newRows) { + row.save(writer, options); + writer.write('\n'); + } + writer.write("oldRowCount="); writer.write(Integer.toString(_oldRows.size())); writer.write('\n'); + for (Row row : _oldRows) { + row.save(writer, options); + writer.write('\n'); + } + writer.write("/ec/\n"); // end of change marker + } + + static public Change load(LineNumberReader reader, Pool pool) throws Exception { + String baseColumnName = null; + int columnInsertIndex = -1; + + List columnNames = null; + List columnTypes = null; + + List rowIndices = null; + List dataExtensions = null; + + List oldRows = null; + List newRows = null; + + int firstNewCellIndex = -1; + + String line; + while ((line = reader.readLine()) != null && !"/ec/".equals(line)) { + int equal = line.indexOf('='); + CharSequence field = line.subSequence(0, equal); + String value = line.substring(equal + 1); + + if ("baseColumnName".equals(field)) { + baseColumnName = value; + } else if ("columnInsertIndex".equals(field)) { + columnInsertIndex = Integer.parseInt(value); + } else if ("firstNewCellIndex".equals(field)) { + firstNewCellIndex = Integer.parseInt(value); + } else if ("rowIndexCount".equals(field)) { + int count = Integer.parseInt(value); + + rowIndices = new ArrayList(count); + for (int i = 0; i < count; i++) { + line = reader.readLine(); + if (line != null) { + rowIndices.add(Integer.parseInt(line)); + } + } + } else if ("columnNameCount".equals(field)) { + int count = Integer.parseInt(value); + + columnNames = new ArrayList(count); + for (int i = 0; i < count; i++) { + line = reader.readLine(); + if (line != null) { + columnNames.add(line); + } + } + } else if ("columnTypeCount".equals(field)) { + int count = Integer.parseInt(value); + + columnTypes = new ArrayList(count); + for (int i = 0; i < count; i++) { + line = reader.readLine(); + columnTypes.add(FreebaseType.load(ParsingUtilities.evaluateJsonStringToObject(line))); + } + } else if ("dataExtensionCount".equals(field)) { + int count = Integer.parseInt(value); + + dataExtensions = new ArrayList(count); + for (int i = 0; i < count; i++) { + line = reader.readLine(); + + if (line == null) continue; + + if (line.length() == 0) { + dataExtensions.add(null); + continue; + } + + int rowCount = Integer.parseInt(line); + Object[][] data = new Object[rowCount][]; + + for (int r = 0; r < rowCount; r++) { + Object[] row = new Object[columnNames.size()]; + for (int c = 0; c < columnNames.size(); c++) { + line = reader.readLine(); + + row[c] = ReconCandidate.loadStreaming(line); + } + + data[r] = row; + } + + dataExtensions.add(new DataExtension(data)); + } + } else if ("oldRowCount".equals(field)) { + int count = Integer.parseInt(value); + + oldRows = new ArrayList(count); + for (int i = 0; i < count; i++) { + line = reader.readLine(); + if (line != null) { + oldRows.add(Row.load(line, pool)); + } + } + } else if ("newRowCount".equals(field)) { + int count = Integer.parseInt(value); + + newRows = new ArrayList(count); + for (int i = 0; i < count; i++) { + line = reader.readLine(); + if (line != null) { + newRows.add(Row.load(line, pool)); + } + } + } + + } + + DataExtensionChange change = new DataExtensionChange( + baseColumnName, + columnInsertIndex, + columnNames, + columnTypes, + rowIndices, + dataExtensions, + firstNewCellIndex, + oldRows, + newRows + ); + + + return change; + } +} diff --git a/main/src/com/google/refine/model/changes/MassCellChange.java b/main/src/com/google/refine/model/changes/MassCellChange.java new file mode 100644 index 000000000..38de36087 --- /dev/null +++ b/main/src/com/google/refine/model/changes/MassCellChange.java @@ -0,0 +1,129 @@ +package com.google.refine.model.changes; + +import java.io.IOException; +import java.io.LineNumberReader; +import java.io.Writer; +import java.util.List; +import java.util.Properties; + +import com.google.refine.history.Change; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.Row; +import com.google.refine.util.Pool; + +public class MassCellChange implements Change { + final protected CellChange[] _cellChanges; + final protected String _commonColumnName; + final protected boolean _updateRowContextDependencies; + + public MassCellChange( + CellChange[] cellChanges, + String commonColumnName, + boolean updateRowContextDependencies) { + + _cellChanges = cellChanges; + _commonColumnName = commonColumnName; + _updateRowContextDependencies = updateRowContextDependencies; + } + + public MassCellChange( + List cellChanges, + String commonColumnName, + boolean updateRowContextDependencies) { + + _cellChanges = new CellChange[cellChanges.size()]; + _commonColumnName = commonColumnName; + cellChanges.toArray(_cellChanges); + + _updateRowContextDependencies = updateRowContextDependencies; + } + + public MassCellChange(CellChange cellChange, String commonColumnName, boolean updateRowContextDependencies) { + _cellChanges = new CellChange[1]; + _cellChanges[0] = cellChange; + + _commonColumnName = commonColumnName; + + _updateRowContextDependencies = updateRowContextDependencies; + } + + public void apply(Project project) { + synchronized (project) { + List rows = project.rows; + + for (CellChange cellChange : _cellChanges) { + rows.get(cellChange.row).setCell(cellChange.cellIndex, cellChange.newCell); + } + + if (_commonColumnName != null) { + Column column = project.columnModel.getColumnByName(_commonColumnName); + column.clearPrecomputes(); + } + + if (_updateRowContextDependencies) { + project.update(); + } + } + } + + public void revert(Project project) { + synchronized (project) { + List rows = project.rows; + + for (CellChange cellChange : _cellChanges) { + rows.get(cellChange.row).setCell(cellChange.cellIndex, cellChange.oldCell); + } + + if (_commonColumnName != null) { + Column column = project.columnModel.getColumnByName(_commonColumnName); + column.clearPrecomputes(); + } + + if (_updateRowContextDependencies) { + project.update(); + } + } + } + + public void save(Writer writer, Properties options) throws IOException { + if (_commonColumnName != null) { + writer.write("commonColumnName="); writer.write(_commonColumnName); writer.write('\n'); + } + writer.write("updateRowContextDependencies="); writer.write(Boolean.toString(_updateRowContextDependencies)); writer.write('\n'); + writer.write("cellChangeCount="); writer.write(Integer.toString(_cellChanges.length)); writer.write('\n'); + for (CellChange c : _cellChanges) { + c.save(writer, options); + } + writer.write("/ec/\n"); // end of change marker + } + + static public Change load(LineNumberReader reader, Pool pool) throws Exception { + String commonColumnName = null; + boolean updateRowContextDependencies = false; + CellChange[] cellChanges = null; + + String line; + while ((line = reader.readLine()) != null && !"/ec/".equals(line)) { + int equal = line.indexOf('='); + CharSequence field = line.subSequence(0, equal); + + if ("commonColumnName".equals(field)) { + commonColumnName = line.substring(equal + 1); + } else if ("updateRowContextDependencies".equals(field)) { + updateRowContextDependencies = Boolean.parseBoolean(line.substring(equal + 1)); + } else if ("cellChangeCount".equals(field)) { + int cellChangeCount = Integer.parseInt(line.substring(equal + 1)); + + cellChanges = new CellChange[cellChangeCount]; + for (int i = 0; i < cellChangeCount; i++) { + cellChanges[i] = CellChange.load(reader, pool); + } + } + } + + MassCellChange change = new MassCellChange(cellChanges, commonColumnName, updateRowContextDependencies); + + return change; + } +} diff --git a/main/src/com/google/refine/model/changes/MassChange.java b/main/src/com/google/refine/model/changes/MassChange.java new file mode 100644 index 000000000..00dd07eb5 --- /dev/null +++ b/main/src/com/google/refine/model/changes/MassChange.java @@ -0,0 +1,82 @@ +package com.google.refine.model.changes; + + import java.io.IOException; +import java.io.LineNumberReader; +import java.io.Writer; +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +import com.google.refine.history.Change; +import com.google.refine.history.History; +import com.google.refine.model.Project; +import com.google.refine.util.Pool; + +public class MassChange implements Change { + final protected List _changes; + final protected boolean _updateRowContextDependencies; + + public MassChange(List changes, boolean updateRowContextDependencies) { + _changes = changes; + _updateRowContextDependencies = updateRowContextDependencies; + } + + public void apply(Project project) { + synchronized (project) { + for (Change change : _changes) { + change.apply(project); + } + + if (_updateRowContextDependencies) { + project.update(); + } + } + } + + public void revert(Project project) { + synchronized (project) { + for (Change change : _changes) { + change.revert(project); + } + + if (_updateRowContextDependencies) { + project.update(); + } + } + } + + public void save(Writer writer, Properties options) throws IOException { + writer.write("updateRowContextDependencies="); writer.write(Boolean.toString(_updateRowContextDependencies)); writer.write('\n'); + writer.write("changeCount="); writer.write(Integer.toString(_changes.size())); writer.write('\n'); + for (Change c : _changes) { + History.writeOneChange(writer, c, options); + } + writer.write("/ec/\n"); // end of change marker + } + + static public Change load(LineNumberReader reader, Pool pool) throws Exception { + boolean updateRowContextDependencies = false; + List changes = null; + + String line; + while ((line = reader.readLine()) != null && !"/ec/".equals(line)) { + int equal = line.indexOf('='); + CharSequence field = line.subSequence(0, equal); + + if ("updateRowContextDependencies".equals(field)) { + updateRowContextDependencies = Boolean.parseBoolean(line.substring(equal + 1)); + } else if ("changeCount".equals(field)) { + int changeCount = Integer.parseInt(line.substring(equal + 1)); + + changes = new ArrayList(changeCount); + for (int i = 0; i < changeCount; i++) { + changes.add(History.readOneChange(reader, pool)); + } + } + } + + MassChange change = new MassChange(changes, updateRowContextDependencies); + + return change; + } +} diff --git a/main/src/com/google/refine/model/changes/MassReconChange.java b/main/src/com/google/refine/model/changes/MassReconChange.java new file mode 100644 index 000000000..edd3e5d71 --- /dev/null +++ b/main/src/com/google/refine/model/changes/MassReconChange.java @@ -0,0 +1,111 @@ +package com.google.refine.model.changes; + +import java.io.IOException; +import java.io.LineNumberReader; +import java.io.Writer; +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.history.Change; +import com.google.refine.model.Cell; +import com.google.refine.model.Project; +import com.google.refine.model.Recon; +import com.google.refine.model.Row; +import com.google.refine.util.Pool; + +public class MassReconChange implements Change { + final protected Map _newRecons; + final protected Map _oldRecons; + + public MassReconChange(Map newRecons, Map oldRecons) { + _newRecons = newRecons; + _oldRecons = oldRecons; + } + + public void apply(Project project) { + switchRecons(project, _newRecons); + } + + public void revert(Project project) { + switchRecons(project, _oldRecons); + } + + protected void switchRecons(Project project, Map reconMap) { + synchronized (project) { + for (int r = 0; r < project.rows.size(); r++) { + Row row = project.rows.get(r); + + for (int c = 0; c < row.cells.size(); c++) { + Cell cell = row.cells.get(c); + if (cell != null && cell.recon != null) { + Recon recon = cell.recon; + + if (reconMap.containsKey(recon.id)) { + row.setCell(c, new Cell(cell.value, reconMap.get(recon.id))); + } + } + } + } + } + } + + public void save(Writer writer, Properties options) throws IOException { + writeRecons(writer, options, _oldRecons, "oldReconCount"); + writeRecons(writer, options, _newRecons, "newReconCount"); + writer.write("/ec/\n"); // end of change marker + } + + protected void writeRecons(Writer writer, Properties options, Map recons, String key) throws IOException { + writer.write(key + "="); writer.write(Integer.toString(recons.size())); writer.write('\n'); + for (Recon recon : recons.values()) { + Pool pool = (Pool) options.get("pool"); + pool.poolReconCandidates(recon); + + JSONWriter jsonWriter = new JSONWriter(writer); + try { + recon.write(jsonWriter, options); + } catch (JSONException e) { + e.printStackTrace(); + } + writer.write("\n"); + } + } + + static public Change load(LineNumberReader reader, Pool pool) throws Exception { + Map oldRecons = new HashMap(); + Map newRecons = new HashMap(); + + String line; + while ((line = reader.readLine()) != null && !"/ec/".equals(line)) { + int equal = line.indexOf('='); + CharSequence field = line.subSequence(0, equal); + String value = line.substring(equal + 1); + + if ("oldReconCount".equals(field)) { + loadRecons(reader, pool, oldRecons, value); + } else if ("newReconCount".equals(field)) { + loadRecons(reader, pool, newRecons, value); + } + } + + MassReconChange change = new MassReconChange(newRecons, oldRecons); + + return change; + } + + static protected void loadRecons(LineNumberReader reader, Pool pool, Map recons, String countString) throws Exception { + int count = Integer.parseInt(countString); + + for (int i = 0; i < count; i++) { + String line = reader.readLine(); + Recon recon = Recon.loadStreaming(line, pool); + + recons.put(recon.id, recon); + } + } +} + diff --git a/main/src/com/google/refine/model/changes/MassRowChange.java b/main/src/com/google/refine/model/changes/MassRowChange.java new file mode 100644 index 000000000..509d4e167 --- /dev/null +++ b/main/src/com/google/refine/model/changes/MassRowChange.java @@ -0,0 +1,93 @@ +package com.google.refine.model.changes; + +import java.io.IOException; +import java.io.LineNumberReader; +import java.io.Writer; +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +import com.google.refine.history.Change; +import com.google.refine.model.Project; +import com.google.refine.model.Row; +import com.google.refine.util.Pool; + +public class MassRowChange implements Change { + final protected List _newRows; + protected List _oldRows; + + public MassRowChange(List newRows) { + _newRows = newRows; + } + + public void apply(Project project) { + synchronized (project) { + _oldRows = new ArrayList(project.rows); + project.rows.clear(); + project.rows.addAll(_newRows); + + project.update(); + } + } + + public void revert(Project project) { + synchronized (project) { + project.rows.clear(); + project.rows.addAll(_oldRows); + + project.update(); + } + } + + public void save(Writer writer, Properties options) throws IOException { + writer.write("newRowCount="); writer.write(Integer.toString(_newRows.size())); writer.write('\n'); + for (Row row : _newRows) { + row.save(writer, options); + writer.write('\n'); + } + writer.write("oldRowCount="); writer.write(Integer.toString(_oldRows.size())); writer.write('\n'); + for (Row row : _oldRows) { + row.save(writer, options); + writer.write('\n'); + } + writer.write("/ec/\n"); // end of change marker + } + + static public Change load(LineNumberReader reader, Pool pool) throws Exception { + List oldRows = null; + List newRows = null; + + String line; + while ((line = reader.readLine()) != null && !"/ec/".equals(line)) { + int equal = line.indexOf('='); + CharSequence field = line.subSequence(0, equal); + + if ("oldRowCount".equals(field)) { + int count = Integer.parseInt(line.substring(equal + 1)); + + oldRows = new ArrayList(count); + for (int i = 0; i < count; i++) { + line = reader.readLine(); + if (line != null) { + oldRows.add(Row.load(line, pool)); + } + } + } else if ("newRowCount".equals(field)) { + int count = Integer.parseInt(line.substring(equal + 1)); + + newRows = new ArrayList(count); + for (int i = 0; i < count; i++) { + line = reader.readLine(); + if (line != null) { + newRows.add(Row.load(line, pool)); + } + } + } + } + + MassRowChange change = new MassRowChange(newRows); + change._oldRows = oldRows; + + return change; + } +} diff --git a/main/src/com/google/refine/model/changes/MassRowColumnChange.java b/main/src/com/google/refine/model/changes/MassRowColumnChange.java new file mode 100644 index 000000000..e8c9a2079 --- /dev/null +++ b/main/src/com/google/refine/model/changes/MassRowColumnChange.java @@ -0,0 +1,139 @@ +package com.google.refine.model.changes; + +import java.io.IOException; +import java.io.LineNumberReader; +import java.io.Writer; +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +import com.google.refine.history.Change; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.Row; +import com.google.refine.util.Pool; + +public class MassRowColumnChange implements Change { + final protected List _newColumns; + final protected List _newRows; + protected List _oldColumns; + protected List _oldRows; + + public MassRowColumnChange(List newColumns, List newRows) { + _newColumns = newColumns; + _newRows = newRows; + } + + public void apply(Project project) { + synchronized (project) { + _oldColumns = new ArrayList(project.columnModel.columns); + _oldRows = new ArrayList(project.rows); + + project.columnModel.columns.clear(); + project.columnModel.columns.addAll(_newColumns); + + project.rows.clear(); + project.rows.addAll(_newRows); + + project.update(); + } + } + + public void revert(Project project) { + synchronized (project) { + project.columnModel.columns.clear(); + project.columnModel.columns.addAll(_oldColumns); + + project.rows.clear(); + project.rows.addAll(_oldRows); + + project.update(); + } + } + + public void save(Writer writer, Properties options) throws IOException { + writer.write("newColumnCount="); writer.write(Integer.toString(_newColumns.size())); writer.write('\n'); + for (Column column : _newColumns) { + column.save(writer); + writer.write('\n'); + } + writer.write("oldColumnCount="); writer.write(Integer.toString(_oldColumns.size())); writer.write('\n'); + for (Column column : _oldColumns) { + column.save(writer); + writer.write('\n'); + } + writer.write("newRowCount="); writer.write(Integer.toString(_newRows.size())); writer.write('\n'); + for (Row row : _newRows) { + row.save(writer, options); + writer.write('\n'); + } + writer.write("oldRowCount="); writer.write(Integer.toString(_oldRows.size())); writer.write('\n'); + for (Row row : _oldRows) { + row.save(writer, options); + writer.write('\n'); + } + writer.write("/ec/\n"); // end of change marker + } + + static public Change load(LineNumberReader reader, Pool pool) throws Exception { + List oldColumns = null; + List newColumns = null; + + List oldRows = null; + List newRows = null; + + String line; + while ((line = reader.readLine()) != null && !"/ec/".equals(line)) { + int equal = line.indexOf('='); + CharSequence field = line.subSequence(0, equal); + + if ("oldRowCount".equals(field)) { + int count = Integer.parseInt(line.substring(equal + 1)); + + oldRows = new ArrayList(count); + for (int i = 0; i < count; i++) { + line = reader.readLine(); + if (line != null) { + oldRows.add(Row.load(line, pool)); + } + } + } else if ("newRowCount".equals(field)) { + int count = Integer.parseInt(line.substring(equal + 1)); + + newRows = new ArrayList(count); + for (int i = 0; i < count; i++) { + line = reader.readLine(); + if (line != null) { + newRows.add(Row.load(line, pool)); + } + } + } else if ("oldColumnCount".equals(field)) { + int count = Integer.parseInt(line.substring(equal + 1)); + + oldColumns = new ArrayList(count); + for (int i = 0; i < count; i++) { + line = reader.readLine(); + if (line != null) { + oldColumns.add(Column.load(line)); + } + } + } else if ("newColumnCount".equals(field)) { + int count = Integer.parseInt(line.substring(equal + 1)); + + newColumns = new ArrayList(count); + for (int i = 0; i < count; i++) { + line = reader.readLine(); + if (line != null) { + newColumns.add(Column.load(line)); + } + } + } + } + + MassRowColumnChange change = new MassRowColumnChange(newColumns, newRows); + change._oldColumns = oldColumns; + change._oldRows = oldRows; + + return change; + } +} diff --git a/main/src/com/google/refine/model/changes/ReconChange.java b/main/src/com/google/refine/model/changes/ReconChange.java new file mode 100644 index 000000000..e44f0a78a --- /dev/null +++ b/main/src/com/google/refine/model/changes/ReconChange.java @@ -0,0 +1,174 @@ +/** + * + */ +package com.google.refine.model.changes; + +import java.io.IOException; +import java.io.LineNumberReader; +import java.io.Writer; +import java.util.List; +import java.util.Properties; + +import com.google.refine.history.Change; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.ReconStats; +import com.google.refine.model.recon.ReconConfig; +import com.google.refine.util.ParsingUtilities; +import com.google.refine.util.Pool; + +public class ReconChange extends MassCellChange { + final protected ReconConfig _newReconConfig; + protected ReconStats _newReconStats; + + protected ReconConfig _oldReconConfig; + protected ReconStats _oldReconStats; + + public ReconChange( + List cellChanges, + String commonColumnName, + ReconConfig newReconConfig, + ReconStats newReconStats // can be null + ) { + super(cellChanges, commonColumnName, false); + _newReconConfig = newReconConfig; + _newReconStats = newReconStats; + } + + public ReconChange( + CellChange[] cellChanges, + String commonColumnName, + ReconConfig newReconConfig, + ReconStats newReconStats // can be null + ) { + super(cellChanges, commonColumnName, false); + _newReconConfig = newReconConfig; + _newReconStats = newReconStats; + } + + public ReconChange( + CellChange cellChange, + String commonColumnName, + ReconConfig newReconConfig, + ReconStats newReconStats // can be null + ) { + super(cellChange, commonColumnName, false); + _newReconConfig = newReconConfig; + _newReconStats = newReconStats; + } + + @Override + public void apply(Project project) { + synchronized (project) { + super.apply(project); + + Column column = project.columnModel.getColumnByName(_commonColumnName); + + if (_newReconStats == null) { + _newReconStats = ReconStats.create(project, column.getCellIndex()); + } + + _oldReconConfig = column.getReconConfig(); + _oldReconStats = column.getReconStats(); + + column.setReconConfig(_newReconConfig); + column.setReconStats(_newReconStats); + + column.clearPrecomputes(); + } + } + + @Override + public void revert(Project project) { + synchronized (project) { + super.revert(project); + + Column column = project.columnModel.getColumnByName(_commonColumnName); + column.setReconConfig(_oldReconConfig); + column.setReconStats(_oldReconStats); + + column.clearPrecomputes(); + } + } + + public void save(Writer writer, Properties options) throws IOException { + writer.write("newReconConfig="); + if (_newReconConfig != null) { + _newReconConfig.save(writer); + } + writer.write('\n'); + + writer.write("newReconStats="); + if (_newReconStats != null) { + _newReconStats.save(writer); + } + writer.write('\n'); + + writer.write("oldReconConfig="); + if (_oldReconConfig != null) { + _oldReconConfig.save(writer); + } + writer.write('\n'); + + writer.write("oldReconStats="); + if (_oldReconStats != null) { + _oldReconStats.save(writer); + } + writer.write('\n'); + + super.save(writer, options); + } + + static public Change load(LineNumberReader reader, Pool pool) throws Exception { + ReconConfig newReconConfig = null; + ReconStats newReconStats = null; + ReconConfig oldReconConfig = null; + ReconStats oldReconStats = null; + + String commonColumnName = null; + CellChange[] cellChanges = null; + + String line; + while ((line = reader.readLine()) != null && !"/ec/".equals(line)) { + int equal = line.indexOf('='); + + CharSequence field = line.subSequence(0, equal); + String value = line.substring(equal + 1); + + if ("newReconConfig".equals(field)) { + if (value.length() > 0) { + newReconConfig = ReconConfig.reconstruct(ParsingUtilities.evaluateJsonStringToObject(value)); + } + } else if ("newReconStats".equals(field)) { + if (value.length() > 0) { + newReconStats = ReconStats.load(ParsingUtilities.evaluateJsonStringToObject(value)); + } + } else if ("oldReconConfig".equals(field)) { + if (value.length() > 0) { + oldReconConfig = ReconConfig.reconstruct(ParsingUtilities.evaluateJsonStringToObject(value)); + } + } else if ("oldReconStats".equals(field)) { + if (value.length() > 0) { + oldReconStats = ReconStats.load(ParsingUtilities.evaluateJsonStringToObject(value)); + } + } else if ("commonColumnName".equals(field)) { + commonColumnName = value; + } else if ("cellChangeCount".equals(field)) { + int cellChangeCount = Integer.parseInt(value); + + cellChanges = new CellChange[cellChangeCount]; + for (int i = 0; i < cellChangeCount; i++) { + cellChanges[i] = CellChange.load(reader, pool); + } + } + } + + ReconChange change = new ReconChange( + cellChanges, commonColumnName, newReconConfig, newReconStats); + + change._oldReconConfig = oldReconConfig; + change._oldReconStats = oldReconStats; + + return change; + } +} \ No newline at end of file diff --git a/main/src/com/google/refine/model/changes/RowFlagChange.java b/main/src/com/google/refine/model/changes/RowFlagChange.java new file mode 100644 index 000000000..abc655eb4 --- /dev/null +++ b/main/src/com/google/refine/model/changes/RowFlagChange.java @@ -0,0 +1,69 @@ +package com.google.refine.model.changes; + +import java.io.IOException; +import java.io.LineNumberReader; +import java.io.Writer; +import java.util.Properties; + +import com.google.refine.history.Change; +import com.google.refine.model.Project; +import com.google.refine.model.Row; +import com.google.refine.util.Pool; + +public class RowFlagChange implements Change { + final int rowIndex; + final boolean newFlagged; + Boolean oldFlagged = null; + + public RowFlagChange(int rowIndex, boolean newFlagged) { + this.rowIndex = rowIndex; + this.newFlagged = newFlagged; + } + + public void apply(Project project) { + Row row = project.rows.get(rowIndex); + if (oldFlagged == null) { + oldFlagged = row.flagged; + } + row.flagged = newFlagged; + } + + public void revert(Project project) { + Row row = project.rows.get(rowIndex); + + row.flagged = oldFlagged; + } + + public void save(Writer writer, Properties options) throws IOException { + writer.write("row="); writer.write(Integer.toString(rowIndex)); writer.write('\n'); + writer.write("newFlagged="); writer.write(Boolean.toString(newFlagged)); writer.write('\n'); + writer.write("oldFlagged="); writer.write(Boolean.toString(oldFlagged)); writer.write('\n'); + writer.write("/ec/\n"); // end of change marker + } + + static public RowFlagChange load(LineNumberReader reader, Pool pool) throws Exception { + int row = -1; + boolean oldFlagged = false; + boolean newFlagged = false; + + String line; + while ((line = reader.readLine()) != null && !"/ec/".equals(line)) { + int equal = line.indexOf('='); + CharSequence field = line.subSequence(0, equal); + String value = line.substring(equal + 1); + + if ("row".equals(field)) { + row = Integer.parseInt(value); + } else if ("oldFlagged".equals(field)) { + oldFlagged = Boolean.parseBoolean(value); + } else if ("newFlagged".equals(field)) { + oldFlagged = Boolean.parseBoolean(value); + } + } + + RowFlagChange change = new RowFlagChange(row, newFlagged); + change.oldFlagged = oldFlagged; + + return change; + } +} \ No newline at end of file diff --git a/main/src/com/google/refine/model/changes/RowRemovalChange.java b/main/src/com/google/refine/model/changes/RowRemovalChange.java new file mode 100644 index 000000000..107aad229 --- /dev/null +++ b/main/src/com/google/refine/model/changes/RowRemovalChange.java @@ -0,0 +1,109 @@ +package com.google.refine.model.changes; + +import java.io.IOException; +import java.io.LineNumberReader; +import java.io.Writer; +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +import com.google.refine.history.Change; +import com.google.refine.model.Project; +import com.google.refine.model.Row; +import com.google.refine.util.Pool; + +public class RowRemovalChange implements Change { + final protected List _rowIndices; + protected List _rows; + + public RowRemovalChange(List rowIndices) { + _rowIndices = rowIndices; + } + + public void apply(Project project) { + synchronized (project) { + int count = _rowIndices.size(); + + _rows = new ArrayList(count); + + int offset = 0; + for (int i = 0; i < count; i++) { + int index = _rowIndices.get(i); + + Row row = project.rows.remove(index + offset); + _rows.add(row); + + offset--; + } + + project.update(); + } + } + + public void revert(Project project) { + synchronized (project) { + int count = _rowIndices.size(); + + for (int i = 0; i < count; i++) { + int index = _rowIndices.get(i); + Row row = _rows.get(i); + + project.rows.add(index, row); + } + + project.update(); + } + } + + public void save(Writer writer, Properties options) throws IOException { + writer.write("rowIndexCount="); writer.write(Integer.toString(_rowIndices.size())); writer.write('\n'); + for (Integer index : _rowIndices) { + writer.write(index.toString()); + writer.write('\n'); + } + writer.write("rowCount="); writer.write(Integer.toString(_rows.size())); writer.write('\n'); + for (Row row : _rows) { + row.save(writer, options); + writer.write('\n'); + } + writer.write("/ec/\n"); // end of change marker + } + + static public Change load(LineNumberReader reader, Pool pool) throws Exception { + List rowIndices = null; + List rows = null; + + String line; + while ((line = reader.readLine()) != null && !"/ec/".equals(line)) { + int equal = line.indexOf('='); + CharSequence field = line.subSequence(0, equal); + + if ("rowIndexCount".equals(field)) { + int count = Integer.parseInt(line.substring(equal + 1)); + + rowIndices = new ArrayList(count); + for (int i = 0; i < count; i++) { + line = reader.readLine(); + if (line != null) { + rowIndices.add(Integer.parseInt(line)); + } + } + } else if ("rowCount".equals(field)) { + int count = Integer.parseInt(line.substring(equal + 1)); + + rows = new ArrayList(count); + for (int i = 0; i < count; i++) { + line = reader.readLine(); + if (line != null) { + rows.add(Row.load(line, pool)); + } + } + } + } + + RowRemovalChange change = new RowRemovalChange(rowIndices); + change._rows = rows; + + return change; + } +} diff --git a/main/src/com/google/refine/model/changes/RowReorderChange.java b/main/src/com/google/refine/model/changes/RowReorderChange.java new file mode 100644 index 000000000..2b340ad58 --- /dev/null +++ b/main/src/com/google/refine/model/changes/RowReorderChange.java @@ -0,0 +1,94 @@ +package com.google.refine.model.changes; + +import java.io.IOException; +import java.io.LineNumberReader; +import java.io.Writer; +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +import com.google.refine.history.Change; +import com.google.refine.model.Project; +import com.google.refine.model.Row; +import com.google.refine.util.Pool; + +public class RowReorderChange implements Change { + final protected List _rowIndices; + + public RowReorderChange(List rowIndices) { + _rowIndices = rowIndices; + } + + public void apply(Project project) { + synchronized (project) { + List oldRows = project.rows; + List newRows = new ArrayList(oldRows.size()); + + for (Integer oldIndex : _rowIndices) { + newRows.add(oldRows.get(oldIndex)); + } + + project.rows.clear(); + project.rows.addAll(newRows); + project.update(); + } + } + + public void revert(Project project) { + synchronized (project) { + int count = project.rows.size(); + + List newRows = project.rows; + List oldRows = new ArrayList(count); + + for (int r = 0; r < count; r++) { + oldRows.add(null); + } + + for (int newIndex = 0; newIndex < count; newIndex++) { + int oldIndex = _rowIndices.get(newIndex); + Row row = newRows.get(newIndex); + oldRows.set(oldIndex, row); + } + + project.rows.clear(); + project.rows.addAll(oldRows); + project.update(); + } + } + + public void save(Writer writer, Properties options) throws IOException { + writer.write("rowIndexCount="); writer.write(Integer.toString(_rowIndices.size())); writer.write('\n'); + for (Integer index : _rowIndices) { + writer.write(index.toString()); + writer.write('\n'); + } + writer.write("/ec/\n"); // end of change marker + } + + static public Change load(LineNumberReader reader, Pool pool) throws Exception { + List rowIndices = null; + + String line; + while ((line = reader.readLine()) != null && !"/ec/".equals(line)) { + int equal = line.indexOf('='); + CharSequence field = line.subSequence(0, equal); + + if ("rowIndexCount".equals(field)) { + int count = Integer.parseInt(line.substring(equal + 1)); + + rowIndices = new ArrayList(count); + for (int i = 0; i < count; i++) { + line = reader.readLine(); + if (line != null) { + rowIndices.add(Integer.parseInt(line)); + } + } + } + } + + RowReorderChange change = new RowReorderChange(rowIndices); + + return change; + } +} diff --git a/main/src/com/google/refine/model/changes/RowStarChange.java b/main/src/com/google/refine/model/changes/RowStarChange.java new file mode 100644 index 000000000..56049bdd1 --- /dev/null +++ b/main/src/com/google/refine/model/changes/RowStarChange.java @@ -0,0 +1,69 @@ +package com.google.refine.model.changes; + +import java.io.IOException; +import java.io.LineNumberReader; +import java.io.Writer; +import java.util.Properties; + +import com.google.refine.history.Change; +import com.google.refine.model.Project; +import com.google.refine.model.Row; +import com.google.refine.util.Pool; + +public class RowStarChange implements Change { + final int rowIndex; + final boolean newStarred; + Boolean oldStarred = null; + + public RowStarChange(int rowIndex, boolean newStarred) { + this.rowIndex = rowIndex; + this.newStarred = newStarred; + } + + public void apply(Project project) { + Row row = project.rows.get(rowIndex); + if (oldStarred == null) { + oldStarred = row.starred; + } + row.starred = newStarred; + } + + public void revert(Project project) { + Row row = project.rows.get(rowIndex); + + row.starred = oldStarred; + } + + public void save(Writer writer, Properties options) throws IOException { + writer.write("row="); writer.write(Integer.toString(rowIndex)); writer.write('\n'); + writer.write("newStarred="); writer.write(Boolean.toString(newStarred)); writer.write('\n'); + writer.write("oldStarred="); writer.write(Boolean.toString(oldStarred)); writer.write('\n'); + writer.write("/ec/\n"); // end of change marker + } + + static public RowStarChange load(LineNumberReader reader, Pool pool) throws Exception { + int row = -1; + boolean oldStarred = false; + boolean newStarred = false; + + String line; + while ((line = reader.readLine()) != null && !"/ec/".equals(line)) { + int equal = line.indexOf('='); + CharSequence field = line.subSequence(0, equal); + String value = line.substring(equal + 1); + + if ("row".equals(field)) { + row = Integer.parseInt(value); + } else if ("oldStarred".equals(field)) { + oldStarred = Boolean.parseBoolean(value); + } else if ("newStarred".equals(field)) { + oldStarred = Boolean.parseBoolean(value); + } + } + + RowStarChange change = new RowStarChange(row, newStarred); + change.oldStarred = oldStarred; + + return change; + } +} \ No newline at end of file diff --git a/main/src/com/google/refine/model/recon/DataExtensionReconConfig.java b/main/src/com/google/refine/model/recon/DataExtensionReconConfig.java new file mode 100644 index 000000000..a16962381 --- /dev/null +++ b/main/src/com/google/refine/model/recon/DataExtensionReconConfig.java @@ -0,0 +1,63 @@ +package com.google.refine.model.recon; + +import java.util.List; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.model.Cell; +import com.google.refine.model.Project; +import com.google.refine.model.Recon; +import com.google.refine.model.Row; +import com.google.refine.protograph.FreebaseType; + +public class DataExtensionReconConfig extends StrictReconConfig { + final public FreebaseType type; + + private final static String WARN = "Not implemented"; + + static public ReconConfig reconstruct(JSONObject obj) throws Exception { + JSONObject type = obj.getJSONObject("type"); + + return new DataExtensionReconConfig( + new FreebaseType( + type.getString("id"), + type.getString("name") + ) + ); + } + + public DataExtensionReconConfig(FreebaseType type) { + this.type = type; + } + + @Override + public ReconJob createJob(Project project, int rowIndex, Row row, + String columnName, Cell cell) { + throw new RuntimeException(WARN); + } + + @Override + public int getBatchSize() { + throw new RuntimeException(WARN); + } + + public void write(JSONWriter writer, Properties options) throws JSONException { + writer.object(); + writer.key("mode"); writer.value("extend"); + writer.key("type"); type.write(writer, options); + writer.endObject(); + } + + @Override + public List batchRecon(List jobs, long historyEntryID) { + throw new RuntimeException(WARN); + } + + @Override + public String getBriefDescription(Project project, String columnName) { + throw new RuntimeException(WARN); + } +} diff --git a/main/src/com/google/refine/model/recon/GuidBasedReconConfig.java b/main/src/com/google/refine/model/recon/GuidBasedReconConfig.java new file mode 100644 index 000000000..698482f27 --- /dev/null +++ b/main/src/com/google/refine/model/recon/GuidBasedReconConfig.java @@ -0,0 +1,174 @@ +package com.google.refine.model.recon; + +import java.io.InputStream; +import java.io.StringWriter; +import java.net.URL; +import java.net.URLConnection; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; + +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.model.Cell; +import com.google.refine.model.Project; +import com.google.refine.model.Recon; +import com.google.refine.model.ReconCandidate; +import com.google.refine.model.Row; +import com.google.refine.model.Recon.Judgment; +import com.google.refine.util.ParsingUtilities; + +public class GuidBasedReconConfig extends StrictReconConfig { + static public ReconConfig reconstruct(JSONObject obj) throws Exception { + return new GuidBasedReconConfig(); + } + + public GuidBasedReconConfig() { + } + + static protected class GuidBasedReconJob extends ReconJob { + String guid; + + public int getKey() { + return guid.hashCode(); + } + } + + @Override + public ReconJob createJob(Project project, int rowIndex, Row row, + String columnName, Cell cell) { + + GuidBasedReconJob job = new GuidBasedReconJob(); + String s = cell.value.toString(); + + if (s.startsWith("/guid/")) { + s = "#" + s.substring(6); + } else if (!s.startsWith("#")) { + s = "#" + s; + } + + job.guid = s; + + return job; + } + + @Override + public int getBatchSize() { + return 10; + } + + @Override + public String getBriefDescription(Project project, String columnName) { + return "Reconcile cells in column " + columnName + " as Freebase IDs"; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("mode"); writer.value("strict"); + writer.key("match"); writer.value("id"); + writer.endObject(); + } + + @Override + public List batchRecon(List jobs, long historyEntryID) { + List recons = new ArrayList(jobs.size()); + Map guidToRecon = new HashMap(); + + try { + String query = null; + { + StringWriter stringWriter = new StringWriter(); + JSONWriter jsonWriter = new JSONWriter(stringWriter); + + jsonWriter.object(); + jsonWriter.key("query"); + jsonWriter.array(); + jsonWriter.object(); + + jsonWriter.key("id"); jsonWriter.value(null); + jsonWriter.key("name"); jsonWriter.value(null); + jsonWriter.key("guid"); jsonWriter.value(null); + jsonWriter.key("type"); jsonWriter.array(); jsonWriter.endArray(); + + jsonWriter.key("guid|="); + jsonWriter.array(); + for (ReconJob job : jobs) { + jsonWriter.value(((GuidBasedReconJob) job).guid); + } + jsonWriter.endArray(); + + jsonWriter.endObject(); + jsonWriter.endArray(); + jsonWriter.endObject(); + + query = stringWriter.toString(); + } + + StringBuffer sb = new StringBuffer(1024); + sb.append(s_mqlreadService); + sb.append("?query="); + sb.append(ParsingUtilities.encode(query)); + + URL url = new URL(sb.toString()); + URLConnection connection = url.openConnection(); + connection.setConnectTimeout(5000); + connection.connect(); + + InputStream is = connection.getInputStream(); + try { + String s = ParsingUtilities.inputStreamToString(is); + JSONObject o = ParsingUtilities.evaluateJsonStringToObject(s); + JSONArray results = o.getJSONArray("result"); + int count = results.length(); + + for (int i = 0; i < count; i++) { + JSONObject result = results.getJSONObject(i); + + String guid = result.getString("guid"); + + JSONArray types = result.getJSONArray("type"); + String[] typeIDs = new String[types.length()]; + for (int j = 0; j < typeIDs.length; j++) { + typeIDs[j] = types.getString(j); + } + + ReconCandidate candidate = new ReconCandidate( + result.getString("id"), + result.getString("name"), + typeIDs, + 100 + ); + + Recon recon = Recon.makeFreebaseRecon(historyEntryID); + recon.addCandidate(candidate); + recon.service = "mql"; + recon.judgment = Judgment.Matched; + recon.judgmentAction = "auto"; + recon.match = candidate; + recon.matchRank = 0; + + guidToRecon.put(guid, recon); + } + } finally { + is.close(); + } + } catch (Exception e) { + e.printStackTrace(); + } + + for (int i = 0; i < jobs.size(); i++) { + String guid = ((GuidBasedReconJob) jobs.get(i)).guid; + Recon recon = guidToRecon.get(guid); + recons.add(recon); + } + + return recons; + } +} diff --git a/main/src/com/google/refine/model/recon/IdBasedReconConfig.java b/main/src/com/google/refine/model/recon/IdBasedReconConfig.java new file mode 100644 index 000000000..cc25ada83 --- /dev/null +++ b/main/src/com/google/refine/model/recon/IdBasedReconConfig.java @@ -0,0 +1,179 @@ +package com.google.refine.model.recon; + +import java.io.InputStream; +import java.io.StringWriter; +import java.net.URL; +import java.net.URLConnection; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; + +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.model.Cell; +import com.google.refine.model.Project; +import com.google.refine.model.Recon; +import com.google.refine.model.ReconCandidate; +import com.google.refine.model.Row; +import com.google.refine.model.Recon.Judgment; +import com.google.refine.util.ParsingUtilities; + +public class IdBasedReconConfig extends StrictReconConfig { + static public ReconConfig reconstruct(JSONObject obj) throws Exception { + return new IdBasedReconConfig(); + } + + public IdBasedReconConfig() { + } + + static protected class IdBasedReconJob extends ReconJob { + String id; + + public int getKey() { + return id.hashCode(); + } + } + + @Override + public ReconJob createJob(Project project, int rowIndex, Row row, + String columnName, Cell cell) { + + IdBasedReconJob job = new IdBasedReconJob(); + String s = cell.value.toString(); + + if (!s.startsWith("/")) { + if (s.startsWith("92")) { + s = "/guid/" + s; + } else if (!s.contains("/")){ + s = "/en/" + s; + } else { + s = "/" + s; + } + } + + job.id = s; + + return job; + } + + @Override + public int getBatchSize() { + return 10; + } + + @Override + public String getBriefDescription(Project project, String columnName) { + return "Reconcile cells in column " + columnName + " as Freebase IDs"; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("mode"); writer.value("strict"); + writer.key("match"); writer.value("id"); + writer.endObject(); + } + + @Override + public List batchRecon(List jobs, long historyEntryID) { + List recons = new ArrayList(jobs.size()); + Map idToRecon = new HashMap(); + + try { + String query = null; + { + StringWriter stringWriter = new StringWriter(); + JSONWriter jsonWriter = new JSONWriter(stringWriter); + + jsonWriter.object(); + jsonWriter.key("query"); + jsonWriter.array(); + jsonWriter.object(); + + jsonWriter.key("id"); jsonWriter.value(null); + jsonWriter.key("name"); jsonWriter.value(null); + jsonWriter.key("guid"); jsonWriter.value(null); + jsonWriter.key("type"); jsonWriter.array(); jsonWriter.endArray(); + + jsonWriter.key("id|="); + jsonWriter.array(); + for (ReconJob job : jobs) { + jsonWriter.value(((IdBasedReconJob) job).id); + } + jsonWriter.endArray(); + + jsonWriter.endObject(); + jsonWriter.endArray(); + jsonWriter.endObject(); + + query = stringWriter.toString(); + } + + StringBuffer sb = new StringBuffer(1024); + sb.append(s_mqlreadService); + sb.append("?query="); + sb.append(ParsingUtilities.encode(query)); + + URL url = new URL(sb.toString()); + URLConnection connection = url.openConnection(); + connection.setConnectTimeout(5000); + connection.connect(); + + InputStream is = connection.getInputStream(); + try { + String s = ParsingUtilities.inputStreamToString(is); + JSONObject o = ParsingUtilities.evaluateJsonStringToObject(s); + JSONArray results = o.getJSONArray("result"); + int count = results.length(); + + for (int i = 0; i < count; i++) { + JSONObject result = results.getJSONObject(i); + + String id = result.getString("id"); + + JSONArray types = result.getJSONArray("type"); + String[] typeIDs = new String[types.length()]; + for (int j = 0; j < typeIDs.length; j++) { + typeIDs[j] = types.getString(j); + } + + ReconCandidate candidate = new ReconCandidate( + id, + result.getString("name"), + typeIDs, + 100 + ); + + Recon recon = Recon.makeFreebaseRecon(historyEntryID); + recon.addCandidate(candidate); + recon.service = "mql"; + recon.judgment = Judgment.Matched; + recon.judgmentAction = "auto"; + recon.match = candidate; + recon.matchRank = 0; + + idToRecon.put(id, recon); + } + } finally { + is.close(); + } + } catch (Exception e) { + e.printStackTrace(); + } + + for (int i = 0; i < jobs.size(); i++) { + String id = ((IdBasedReconJob) jobs.get(i)).id; + Recon recon = idToRecon.get(id); + recons.add(recon); + } + + return recons; + } + +} diff --git a/main/src/com/google/refine/model/recon/KeyBasedReconConfig.java b/main/src/com/google/refine/model/recon/KeyBasedReconConfig.java new file mode 100644 index 000000000..53be224bc --- /dev/null +++ b/main/src/com/google/refine/model/recon/KeyBasedReconConfig.java @@ -0,0 +1,193 @@ +package com.google.refine.model.recon; + +import java.io.InputStream; +import java.io.StringWriter; +import java.net.URL; +import java.net.URLConnection; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; + +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.model.Cell; +import com.google.refine.model.Project; +import com.google.refine.model.Recon; +import com.google.refine.model.ReconCandidate; +import com.google.refine.model.Row; +import com.google.refine.model.Recon.Judgment; +import com.google.refine.protograph.FreebaseTopic; +import com.google.refine.util.ParsingUtilities; + +public class KeyBasedReconConfig extends StrictReconConfig { + final public FreebaseTopic namespace; + + static public ReconConfig reconstruct(JSONObject obj) throws Exception { + JSONObject ns = obj.getJSONObject("namespace"); + + return new KeyBasedReconConfig( + new FreebaseTopic( + ns.getString("id"), + ns.getString("name") + ) + ); + } + + public KeyBasedReconConfig(FreebaseTopic namespace) { + this.namespace = namespace; + } + + static protected class KeyBasedReconJob extends ReconJob { + String key; + + public int getKey() { + return key.hashCode(); + } + } + + @Override + public ReconJob createJob(Project project, int rowIndex, Row row, + String columnName, Cell cell) { + + KeyBasedReconJob job = new KeyBasedReconJob(); + + job.key = cell.value.toString().replace(' ', '_'); + + return job; + } + + @Override + public int getBatchSize() { + return 10; + } + + @Override + public String getBriefDescription(Project project, String columnName) { + return "Reconcile cells in column " + columnName + " to topics with keys in namespace " + namespace.id; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("mode"); writer.value("strict"); + writer.key("match"); writer.value("key"); + writer.key("namespace"); namespace.write(writer, options); + writer.endObject(); + } + + @Override + public List batchRecon(List jobs, long historyEntryID) { + List recons = new ArrayList(jobs.size()); + Map keyToRecon = new HashMap(); + + try { + String query = null; + { + StringWriter stringWriter = new StringWriter(); + JSONWriter jsonWriter = new JSONWriter(stringWriter); + + jsonWriter.object(); + jsonWriter.key("query"); + jsonWriter.array(); + jsonWriter.object(); + + jsonWriter.key("id"); jsonWriter.value(null); + jsonWriter.key("name"); jsonWriter.value(null); + jsonWriter.key("guid"); jsonWriter.value(null); + jsonWriter.key("type"); jsonWriter.array(); jsonWriter.endArray(); + + jsonWriter.key("key"); + jsonWriter.array(); + jsonWriter.object(); + + jsonWriter.key("namespace"); + jsonWriter.object(); + jsonWriter.key("id"); jsonWriter.value(namespace.id); + jsonWriter.endObject(); + + jsonWriter.key("value"); jsonWriter.value(null); + jsonWriter.key("value|="); + jsonWriter.array(); + for (ReconJob job : jobs) { + jsonWriter.value(((KeyBasedReconJob) job).key); + } + jsonWriter.endArray(); + + jsonWriter.endObject(); + jsonWriter.endArray(); + + jsonWriter.endObject(); + jsonWriter.endArray(); + jsonWriter.endObject(); + + query = stringWriter.toString(); + } + + StringBuffer sb = new StringBuffer(1024); + sb.append(s_mqlreadService); + sb.append("?query="); + sb.append(ParsingUtilities.encode(query)); + + URL url = new URL(sb.toString()); + URLConnection connection = url.openConnection(); + connection.setConnectTimeout(5000); + connection.connect(); + + InputStream is = connection.getInputStream(); + try { + String s = ParsingUtilities.inputStreamToString(is); + JSONObject o = ParsingUtilities.evaluateJsonStringToObject(s); + JSONArray results = o.getJSONArray("result"); + int count = results.length(); + + for (int i = 0; i < count; i++) { + JSONObject result = results.getJSONObject(i); + + String key = result.getJSONArray("key").getJSONObject(0).getString("value"); + + JSONArray types = result.getJSONArray("type"); + String[] typeIDs = new String[types.length()]; + for (int j = 0; j < typeIDs.length; j++) { + typeIDs[j] = types.getString(j); + } + + ReconCandidate candidate = new ReconCandidate( + result.getString("id"), + result.getString("name"), + typeIDs, + 100 + ); + + Recon recon = Recon.makeFreebaseRecon(historyEntryID); + recon.addCandidate(candidate); + recon.service = "mql"; + recon.judgment = Judgment.Matched; + recon.judgmentAction = "auto"; + recon.match = candidate; + recon.matchRank = 0; + + keyToRecon.put(key, recon); + } + } finally { + is.close(); + } + } catch (Exception e) { + e.printStackTrace(); + } + + for (int i = 0; i < jobs.size(); i++) { + String key = ((KeyBasedReconJob) jobs.get(i)).key; + Recon recon = keyToRecon.get(key); + recons.add(recon); + } + + return recons; + } + +} diff --git a/main/src/com/google/refine/model/recon/ReconConfig.java b/main/src/com/google/refine/model/recon/ReconConfig.java new file mode 100644 index 000000000..e489ee5a3 --- /dev/null +++ b/main/src/com/google/refine/model/recon/ReconConfig.java @@ -0,0 +1,54 @@ +package com.google.refine.model.recon; + +import java.io.Writer; +import java.util.List; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.Jsonizable; +import com.google.refine.model.Cell; +import com.google.refine.model.Project; +import com.google.refine.model.Recon; +import com.google.refine.model.Row; + +abstract public class ReconConfig implements Jsonizable { + static public ReconConfig reconstruct(JSONObject obj) throws Exception { + String mode = obj.getString("mode"); + if ("standard-service".equals(mode) || + "heuristic".equals(mode) // legacy + ) { + return StandardReconConfig.reconstruct(obj); + } else if ("strict".equals(mode)) { + return StrictReconConfig.reconstruct(obj); + } else if ("extend".equals(mode)) { + return DataExtensionReconConfig.reconstruct(obj); + } + return null; + } + + abstract public int getBatchSize(); + + abstract public String getBriefDescription(Project project, String columnName); + + abstract public ReconJob createJob( + Project project, + int rowIndex, + Row row, + String columnName, + Cell cell + ); + + abstract public List batchRecon(List jobs, long historyEntryID); + + public void save(Writer writer) { + JSONWriter jsonWriter = new JSONWriter(writer); + try { + write(jsonWriter, new Properties()); + } catch (JSONException e) { + e.printStackTrace(); + } + } +} diff --git a/main/src/com/google/refine/model/recon/ReconJob.java b/main/src/com/google/refine/model/recon/ReconJob.java new file mode 100644 index 000000000..4dbeb5ad9 --- /dev/null +++ b/main/src/com/google/refine/model/recon/ReconJob.java @@ -0,0 +1,5 @@ +package com.google.refine.model.recon; + +abstract public class ReconJob { + abstract public int getKey(); +} diff --git a/main/src/com/google/refine/model/recon/StandardReconConfig.java b/main/src/com/google/refine/model/recon/StandardReconConfig.java new file mode 100644 index 000000000..259115e38 --- /dev/null +++ b/main/src/com/google/refine/model/recon/StandardReconConfig.java @@ -0,0 +1,413 @@ +package com.google.refine.model.recon; + +import java.io.DataOutputStream; +import java.io.InputStream; +import java.io.StringWriter; +import java.net.URL; +import java.net.URLConnection; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Properties; +import java.util.Set; + +import org.apache.commons.lang.StringUtils; +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.model.Cell; +import com.google.refine.model.Project; +import com.google.refine.model.Recon; +import com.google.refine.model.ReconCandidate; +import com.google.refine.model.Row; +import com.google.refine.model.Recon.Judgment; +import com.google.refine.model.RecordModel.RowDependency; +import com.google.refine.protograph.FreebaseProperty; +import com.google.refine.util.ParsingUtilities; + +public class StandardReconConfig extends ReconConfig { + final static Logger logger = LoggerFactory.getLogger("gridworks-standard-recon"); + + static public class ColumnDetail { + final public String columnName; + final public FreebaseProperty property; + + public ColumnDetail(String columnName, FreebaseProperty property) { + this.columnName = columnName; + this.property = property; + } + } + + static public ReconConfig reconstruct(JSONObject obj) throws Exception { + List columnDetails = null; + if (obj.has("columnDetails")) { + JSONArray columnDetailsA = obj.getJSONArray("columnDetails"); + int l = columnDetailsA.length(); + + columnDetails = new ArrayList(l); + for (int i = 0; i < l; i++) { + JSONObject o = columnDetailsA.getJSONObject(i); + JSONObject p = o.getJSONObject("property"); + + columnDetails.add(new ColumnDetail( + o.getString("column"), + new FreebaseProperty( + p.getString("id"), + p.getString("name") + ) + )); + } + } else { + columnDetails = new ArrayList(); + } + + JSONObject t = obj.has("type") && !obj.isNull("type") ? obj.getJSONObject("type") : null; + + return new StandardReconConfig( + obj.getString("service"), + obj.has("identifierSpace") ? obj.getString("identifierSpace") : null, + obj.has("schemaSpace") ? obj.getString("schemaSpace") : null, + t == null ? null : t.getString("id"), + t == null ? null : (t.has("name") ? t.getString("name") : null), + obj.getBoolean("autoMatch"), + columnDetails + ); + } + + static protected class StandardReconJob extends ReconJob { + String text; + String code; + + public int getKey() { + return code.hashCode(); + } + } + + final public String service; + final public String identifierSpace; + final public String schemaSpace; + + final public String typeID; + final public String typeName; + final public boolean autoMatch; + final public List columnDetails; + + public StandardReconConfig( + String service, + String identifierSpace, + String schemaSpace, + + String typeID, + String typeName, + boolean autoMatch, + List columnDetails + ) { + this.service = service; + this.identifierSpace = identifierSpace; + this.schemaSpace = schemaSpace; + + this.typeID = typeID; + this.typeName = typeName; + this.autoMatch = autoMatch; + this.columnDetails = columnDetails; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("mode"); writer.value("standard-service"); + writer.key("service"); writer.value(service); + writer.key("identifierSpace"); writer.value(identifierSpace); + writer.key("schemaSpace"); writer.value(schemaSpace); + writer.key("type"); + if (typeID == null) { + writer.value(null); + } else { + writer.object(); + writer.key("id"); writer.value(typeID); + writer.key("name"); writer.value(typeName); + writer.endObject(); + } + writer.key("autoMatch"); writer.value(autoMatch); + writer.key("columnDetails"); + writer.array(); + for (ColumnDetail c : columnDetails) { + writer.object(); + writer.key("column"); writer.value(c.columnName); + writer.key("property"); c.property.write(writer, options); + writer.endObject(); + } + writer.endArray(); + writer.endObject(); + } + + @Override + public int getBatchSize() { + return 7; + } + + @Override + public String getBriefDescription(Project project, String columnName) { + return "Reconcile cells in column " + columnName + " to type " + typeID; + } + + @Override + public ReconJob createJob(Project project, int rowIndex, Row row, + String columnName, Cell cell) { + + StandardReconJob job = new StandardReconJob(); + + try { + StringWriter stringWriter = new StringWriter(); + JSONWriter jsonWriter = new JSONWriter(stringWriter); + + jsonWriter.object(); + jsonWriter.key("query"); jsonWriter.value(cell.value.toString()); + if (typeID != null) { + jsonWriter.key("type"); jsonWriter.value(typeID); + } + + if (columnDetails.size() > 0) { + jsonWriter.key("properties"); + jsonWriter.array(); + + for (ColumnDetail c : columnDetails) { + int detailCellIndex = project.columnModel.getColumnByName(c.columnName).getCellIndex(); + + Cell cell2 = row.getCell(detailCellIndex); + if (cell2 == null || !ExpressionUtils.isNonBlankData(cell2.value)) { + int cellIndex = project.columnModel.getColumnByName(columnName).getCellIndex(); + + RowDependency rd = project.recordModel.getRowDependency(rowIndex); + if (rd != null && rd.cellDependencies != null) { + int contextRowIndex = rd.cellDependencies[cellIndex].rowIndex; + if (contextRowIndex >= 0 && contextRowIndex < project.rows.size()) { + Row row2 = project.rows.get(contextRowIndex); + + cell2 = row2.getCell(detailCellIndex); + } + } + } + + if (cell2 != null && ExpressionUtils.isNonBlankData(cell2.value)) { + jsonWriter.object(); + + jsonWriter.key("pid"); jsonWriter.value(c.property.id); + jsonWriter.key("v"); + if (cell2.recon != null && cell2.recon.match != null) { + jsonWriter.object(); + jsonWriter.key("id"); jsonWriter.value(cell2.recon.match.id); + jsonWriter.key("name"); jsonWriter.value(cell2.recon.match.name); + jsonWriter.endObject(); + } else { + jsonWriter.value(cell2.value.toString()); + } + + jsonWriter.endObject(); + } + } + + jsonWriter.endArray(); + } + jsonWriter.endObject(); + + job.text = cell.value.toString(); + job.code = stringWriter.toString(); + } catch (JSONException e) { + // + } + return job; + } + + @Override + public List batchRecon(List jobs, long historyEntryID) { + List recons = new ArrayList(jobs.size()); + + StringWriter stringWriter = new StringWriter(); + + stringWriter.write("{"); + for (int i = 0; i < jobs.size(); i++) { + StandardReconJob job = (StandardReconJob) jobs.get(i); + if (i > 0) { + stringWriter.write(","); + } + stringWriter.write("\"q" + i + "\":"); + stringWriter.write(job.code); + } + stringWriter.write("}"); + String queriesString = stringWriter.toString(); + + try { + URL url = new URL(service); + URLConnection connection = url.openConnection(); + { + connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded"); + connection.setConnectTimeout(30000); + connection.setDoOutput(true); + + DataOutputStream dos = new DataOutputStream(connection.getOutputStream()); + try { + String body = "queries=" + ParsingUtilities.encode(queriesString); + + dos.writeBytes(body); + } finally { + dos.flush(); + dos.close(); + } + + connection.connect(); + } + + InputStream is = connection.getInputStream(); + try { + String s = ParsingUtilities.inputStreamToString(is); + JSONObject o = ParsingUtilities.evaluateJsonStringToObject(s); + + for (int i = 0; i < jobs.size(); i++) { + StandardReconJob job = (StandardReconJob) jobs.get(i); + Recon recon = null; + + String text = job.text; + String key = "q" + i; + if (o.has(key)) { + JSONObject o2 = o.getJSONObject(key); + if (o2.has("result")) { + JSONArray results = o2.getJSONArray("result"); + + recon = createReconServiceResults(text, results, historyEntryID); + } + } + + if (recon == null) { + recon = new Recon(historyEntryID, identifierSpace, schemaSpace); + } + recon.service = service; + + recons.add(recon); + } + } finally { + is.close(); + } + } catch (Exception e) { + logger.error("Failed to batch recon with load:\n" + queriesString, e); + } + + while (recons.size() < jobs.size()) { + Recon recon = new Recon(historyEntryID, identifierSpace, schemaSpace); + recon.service = service; + recon.identifierSpace = identifierSpace; + recon.schemaSpace = schemaSpace; + + recons.add(recon); + } + + return recons; + } + + protected Recon createReconServiceResults(String text, JSONArray results, long historyEntryID) { + Recon recon = new Recon(historyEntryID, identifierSpace, schemaSpace); + try { + int length = results.length(); + int count = 0; + for (int i = 0; i < length && count < 3; i++) { + JSONObject result = results.getJSONObject(i); + if (!result.has("name")) { + continue; + } + + JSONArray types = result.getJSONArray("type"); + String[] typeIDs = new String[types.length()]; + for (int j = 0; j < typeIDs.length; j++) { + Object type = types.get(j); + typeIDs[j] = type instanceof String ? (String) type : + ((JSONObject) type).getString("id"); + } + + double score = result.getDouble("score"); + ReconCandidate candidate = new ReconCandidate( + result.getString("id"), + result.getString("name"), + typeIDs, + score + ); + + if (i == 0 && result.has("match") && result.getBoolean("match")) { + recon.match = candidate; + recon.matchRank = 0; + recon.judgment = Judgment.Matched; + recon.judgmentAction = "auto"; + } + + recon.addCandidate(candidate); + count++; + } + + if (count > 0) { + ReconCandidate candidate = recon.candidates.get(0); + + recon.setFeature(Recon.Feature_nameMatch, text.equalsIgnoreCase(candidate.name)); + recon.setFeature(Recon.Feature_nameLevenshtein, StringUtils.getLevenshteinDistance(text, candidate.name)); + recon.setFeature(Recon.Feature_nameWordDistance, wordDistance(text, candidate.name)); + + recon.setFeature(Recon.Feature_typeMatch, false); + if (this.typeID != null) { + for (String typeID : candidate.types) { + if (this.typeID.equals(typeID)) { + recon.setFeature(Recon.Feature_typeMatch, true); + break; + } + } + } + } + } catch (JSONException e) { + e.printStackTrace(); + } + return recon; + } + + static protected double wordDistance(String s1, String s2) { + Set words1 = breakWords(s1); + Set words2 = breakWords(s2); + return words1.size() >= words2.size() ? wordDistance(words1, words2) : wordDistance(words2, words1); + } + + static protected double wordDistance(Set longWords, Set shortWords) { + double common = 0; + for (String word : shortWords) { + if (longWords.contains(word)) { + common++; + } + } + return common / longWords.size(); + } + + static final protected Set s_stopWords = new HashSet(); + static { + s_stopWords.add("the"); + s_stopWords.add("a"); + s_stopWords.add("and"); + s_stopWords.add("of"); + s_stopWords.add("on"); + s_stopWords.add("in"); + s_stopWords.add("at"); + s_stopWords.add("by"); + } + + static protected Set breakWords(String s) { + String[] words = s.toLowerCase().split("\\s+"); + + Set set = new HashSet(words.length); + for (String word : words) { + if (!s_stopWords.contains(word)) { + set.add(word); + } + } + return set; + } +} diff --git a/main/src/com/google/refine/model/recon/StrictReconConfig.java b/main/src/com/google/refine/model/recon/StrictReconConfig.java new file mode 100644 index 000000000..9fcc79091 --- /dev/null +++ b/main/src/com/google/refine/model/recon/StrictReconConfig.java @@ -0,0 +1,19 @@ +package com.google.refine.model.recon; + +import org.json.JSONObject; + +abstract public class StrictReconConfig extends ReconConfig { + final static protected String s_mqlreadService = "http://api.freebase.com/api/service/mqlread"; + + static public ReconConfig reconstruct(JSONObject obj) throws Exception { + String match = obj.getString("match"); + if ("key".equals(match)) { + return KeyBasedReconConfig.reconstruct(obj); + } else if ("id".equals(match)) { + return IdBasedReconConfig.reconstruct(obj); + } else if ("guid".equals(match)) { + return GuidBasedReconConfig.reconstruct(obj); + } + return null; + } +} diff --git a/main/src/com/google/refine/oauth/Credentials.java b/main/src/com/google/refine/oauth/Credentials.java new file mode 100644 index 000000000..9d19d3d1f --- /dev/null +++ b/main/src/com/google/refine/oauth/Credentials.java @@ -0,0 +1,82 @@ +package com.google.refine.oauth; + +import javax.servlet.http.Cookie; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import oauth.signpost.OAuth; +import oauth.signpost.http.HttpParameters; + +import com.google.refine.util.CookiesUtilities; + +public class Credentials { + + private static final String TOKEN = "oauth_token"; + private static final String SECRET = "oauth_token_secret"; + + public enum Type { + REQUEST("request"), + ACCESS("access"); + + private final String postfix; + + Type(String postfix) { + this.postfix = postfix; + } + + public String getCookieName(Provider provider) { + if (provider == null) throw new RuntimeException("Provider can't be null"); + return provider.getHost() + "_" + postfix; + } + }; + + public static Credentials getCredentials(HttpServletRequest request, Provider provider, Type type) { + Cookie cookie = CookiesUtilities.getCookie(request, type.getCookieName(provider)); + return (cookie == null) ? null : makeCredentials(cookie.getValue(), provider); + } + + public static void setCredentials(HttpServletRequest request, HttpServletResponse response, Credentials credentials, Type type, int max_age) { + String name = type.getCookieName(credentials.getProvider()); + String value = credentials.toString(); + CookiesUtilities.setCookie(request, response, name, value, max_age); + } + + public static void deleteCredentials(HttpServletRequest request, HttpServletResponse response, Provider provider, Type type) { + CookiesUtilities.deleteCookie(request, response, type.getCookieName(provider)); + } + + public static Credentials makeCredentials(String str, Provider provider) { + HttpParameters p = OAuth.decodeForm(str); + return new Credentials(p.getFirst(TOKEN), p.getFirst(SECRET), provider); + } + + private Provider provider; + private String token; + private String secret; + + public Credentials(String token, String secret, Provider provider) { + this.token = token; + if (token == null) throw new RuntimeException("Could not find " + TOKEN + " in auth credentials"); + this.secret = secret; + if (secret == null) throw new RuntimeException("Could not find " + SECRET + " in auth credentials"); + this.provider = provider; + if (provider == null) throw new RuntimeException("Provider can't be null"); + } + + public String getToken() { + return token; + } + + public String getSecret() { + return secret; + } + + public Provider getProvider() { + return provider; + } + + public String toString() { + return TOKEN + "=" + OAuth.percentEncode(token) + "&" + SECRET + "=" + OAuth.percentEncode(secret); + } + +} diff --git a/main/src/com/google/refine/oauth/FreebaseProvider.java b/main/src/com/google/refine/oauth/FreebaseProvider.java new file mode 100644 index 000000000..44d2ceea2 --- /dev/null +++ b/main/src/com/google/refine/oauth/FreebaseProvider.java @@ -0,0 +1,21 @@ +package com.google.refine.oauth; + +public class FreebaseProvider extends Provider { + + public FreebaseProvider(String host) { + super(host); + } + + public String getRequestTokenServiceURL() { + return "https://" + host + "/api/oauth/request_token"; + } + + public String getAccessTokenServiceURL() { + return "https://" + host + "/api/oauth/access_token"; + } + + public String getUserAuthorizationURL() { + return "https://" + host + "/signin/app"; + } + +} diff --git a/main/src/com/google/refine/oauth/FreebaseTimeCommonsHttpOAuthConsumer.java b/main/src/com/google/refine/oauth/FreebaseTimeCommonsHttpOAuthConsumer.java new file mode 100644 index 000000000..c82e740d5 --- /dev/null +++ b/main/src/com/google/refine/oauth/FreebaseTimeCommonsHttpOAuthConsumer.java @@ -0,0 +1,69 @@ +package com.google.refine.oauth; + +import java.io.IOException; + +import oauth.signpost.commonshttp.CommonsHttpOAuthConsumer; + +import org.apache.http.HttpEntity; +import org.apache.http.HttpResponse; +import org.apache.http.client.HttpClient; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.impl.client.DefaultHttpClient; +import org.apache.http.params.BasicHttpParams; +import org.apache.http.params.HttpConnectionParams; +import org.apache.http.params.HttpParams; +import org.apache.http.util.EntityUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class FreebaseTimeCommonsHttpOAuthConsumer extends CommonsHttpOAuthConsumer { + + final static Logger logger = LoggerFactory.getLogger("oauth"); + + private static final long serialVersionUID = -4139931605235255279L; + + private static final int SOCKET_TIMEOUT = 3000; + private static final int CONNECTION_TIMEOUT = 3000; + + private static final String TIMER_URL = "http://gridworks-gadgets.freebaseapps.com/time"; + + public FreebaseTimeCommonsHttpOAuthConsumer(String consumerKey, String consumerSecret) { + super(consumerKey, consumerSecret); + } + + /** + * It might be that the user's computer's clock is not synchronized enough with the Freebase servers + * and this might result in Freebase thinking that it was under a replay attack. + * To avoid this problem we get the timestamp directly from acre that we know is synchronized. + * + * NOTE: this call is potentially vulnerable to a man-in-the-middle (MITM) attack, but the same + * could be said if we used an NTP client. + */ + protected String generateTimestamp() { + + long time = -1; + + try { + HttpParams httpParams = new BasicHttpParams(); + HttpConnectionParams.setSoTimeout(httpParams, SOCKET_TIMEOUT); + HttpConnectionParams.setConnectionTimeout(httpParams, CONNECTION_TIMEOUT); + HttpClient httpClient = new DefaultHttpClient(httpParams); + HttpGet httpget = new HttpGet(TIMER_URL); + HttpResponse response = httpClient.execute(httpget); + HttpEntity entity = response.getEntity(); + if (entity != null) { + time = Long.parseLong(EntityUtils.toString(entity),10); + logger.debug("Got remote timestamp {}", time); + } + } catch (IOException e) { + logger.warn("Error obtaining the synchronized remote timestamp, defaulting to the local one",e); + } + + if (time == -1) { + time = System.currentTimeMillis(); + } + + return Long.toString(time / 1000L); + } + +} diff --git a/main/src/com/google/refine/oauth/OAuthUtilities.java b/main/src/com/google/refine/oauth/OAuthUtilities.java new file mode 100644 index 000000000..1d12940f8 --- /dev/null +++ b/main/src/com/google/refine/oauth/OAuthUtilities.java @@ -0,0 +1,65 @@ +package com.google.refine.oauth; + +import java.util.HashMap; +import java.util.Map; + +import javax.servlet.http.HttpServletRequest; + +import oauth.signpost.OAuthConsumer; +import oauth.signpost.OAuthProvider; +import oauth.signpost.commonshttp.CommonsHttpOAuthProvider; +import oauth.signpost.http.HttpParameters; + +import com.google.refine.util.FreebaseUtils; + +public class OAuthUtilities { + + static final private Map providers = new HashMap(); + static final private Map infos = new HashMap(); + + static private final String[] FREEBASE_OAUTH_INFO = { "#9202a8c04000641f80000000150979b7" , "8ded7babfad2f94f4c77e39bbd6c90f31939999b"}; + + static { + Provider freebase = new FreebaseProvider(FreebaseUtils.FREEBASE_HOST); + providers.put(freebase.getHost(), freebase); + + infos.put(freebase.getHost(), FREEBASE_OAUTH_INFO); + } + + public static Provider getProvider(String name) { + return (name == null) ? null : providers.get(name); + } + + public static Provider getProvider(HttpServletRequest request) { + String path = request.getPathInfo().substring(1); + int slash = path.lastIndexOf('/'); + String provider_str = path.substring(slash + 1); + Provider provider = getProvider(provider_str); + if (provider == null) throw new RuntimeException("Can't find OAuth provider '" + provider_str + "'"); + return provider; + } + + public static OAuthConsumer getConsumer(Provider provider) { + if (provider == null) throw new RuntimeException("Provider can't be null"); + String[] consumer_info = infos.get(provider.getHost()); + if (consumer_info == null) throw new RuntimeException("Can't find secrets for provider '" + provider.getHost() + "'"); + OAuthConsumer oauthConsumer = new FreebaseTimeCommonsHttpOAuthConsumer(consumer_info[0],consumer_info[1]); + HttpParameters params = new HttpParameters(); + params.put("realm", provider.getHost()); + oauthConsumer.setAdditionalParameters(params); + return oauthConsumer; + } + + public static OAuthConsumer getConsumer(Credentials credentials, Provider provider) { + OAuthConsumer consumer = getConsumer(provider); + if (credentials != null) { + consumer.setTokenWithSecret(credentials.getToken(), credentials.getSecret()); + } + return consumer; + } + + public static OAuthProvider getOAuthProvider(Provider p) { + return new CommonsHttpOAuthProvider(p.getRequestTokenServiceURL(), p.getAccessTokenServiceURL(), p.getUserAuthorizationURL()); + } + +} diff --git a/main/src/com/google/refine/oauth/Provider.java b/main/src/com/google/refine/oauth/Provider.java new file mode 100644 index 000000000..02d0f12f2 --- /dev/null +++ b/main/src/com/google/refine/oauth/Provider.java @@ -0,0 +1,19 @@ +package com.google.refine.oauth; + + +public abstract class Provider { + + protected String host; + + public Provider(String host) { + this.host = host; + } + + public String getHost() { + return host; + } + + abstract public String getRequestTokenServiceURL(); + abstract public String getAccessTokenServiceURL(); + abstract public String getUserAuthorizationURL(); +} diff --git a/main/src/com/google/refine/operations/EngineDependentMassCellOperation.java b/main/src/com/google/refine/operations/EngineDependentMassCellOperation.java new file mode 100644 index 000000000..63315c03b --- /dev/null +++ b/main/src/com/google/refine/operations/EngineDependentMassCellOperation.java @@ -0,0 +1,59 @@ +package com.google.refine.operations; + +import java.util.ArrayList; +import java.util.List; + +import org.json.JSONObject; + +import com.google.refine.browsing.Engine; +import com.google.refine.browsing.FilteredRows; +import com.google.refine.browsing.RowVisitor; +import com.google.refine.history.Change; +import com.google.refine.history.HistoryEntry; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.changes.CellChange; +import com.google.refine.model.changes.MassCellChange; + +abstract public class EngineDependentMassCellOperation extends EngineDependentOperation { + final protected String _columnName; + final protected boolean _updateRowContextDependencies; + + protected EngineDependentMassCellOperation( + JSONObject engineConfig, String columnName, boolean updateRowContextDependencies) { + super(engineConfig); + _columnName = columnName; + _updateRowContextDependencies = updateRowContextDependencies; + } + + protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception { + Engine engine = createEngine(project); + + Column column = project.columnModel.getColumnByName(_columnName); + if (column == null) { + throw new Exception("No column named " + _columnName); + } + + List cellChanges = new ArrayList(project.rows.size()); + + FilteredRows filteredRows = engine.getAllFilteredRows(); + try { + filteredRows.accept(project, createRowVisitor(project, cellChanges, historyEntryID)); + } catch (Exception e) { + e.printStackTrace(); + } + + String description = createDescription(column, cellChanges); + + return new HistoryEntry( + historyEntryID, project, description, this, createChange(project, column, cellChanges)); + } + + protected Change createChange(Project project, Column column, List cellChanges) { + return new MassCellChange( + cellChanges, column.getName(), _updateRowContextDependencies); + } + + abstract protected RowVisitor createRowVisitor(Project project, List cellChanges, long historyEntryID) throws Exception; + abstract protected String createDescription(Column column, List cellChanges); +} diff --git a/main/src/com/google/refine/operations/EngineDependentOperation.java b/main/src/com/google/refine/operations/EngineDependentOperation.java new file mode 100644 index 000000000..b3764e657 --- /dev/null +++ b/main/src/com/google/refine/operations/EngineDependentOperation.java @@ -0,0 +1,38 @@ +package com.google.refine.operations; + +import org.json.JSONException; +import org.json.JSONObject; + +import com.google.refine.browsing.Engine; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; +import com.google.refine.util.ParsingUtilities; + +abstract public class EngineDependentOperation extends AbstractOperation { + final private String _engineConfigString; + + transient protected JSONObject _engineConfig; + + protected EngineDependentOperation(JSONObject engineConfig) { + _engineConfig = engineConfig; + _engineConfigString = engineConfig == null || engineConfig.length() == 0 + ? null : engineConfig.toString(); + } + + protected Engine createEngine(Project project) throws Exception { + Engine engine = new Engine(project); + engine.initializeFromJSON(getEngineConfig()); + return engine; + } + + protected JSONObject getEngineConfig() { + if (_engineConfig == null && _engineConfigString != null) { + try { + _engineConfig = ParsingUtilities.evaluateJsonStringToObject(_engineConfigString); + } catch (JSONException e) { + // ignore + } + } + return _engineConfig; + } +} diff --git a/main/src/com/google/refine/operations/OnError.java b/main/src/com/google/refine/operations/OnError.java new file mode 100644 index 000000000..407ac611c --- /dev/null +++ b/main/src/com/google/refine/operations/OnError.java @@ -0,0 +1,10 @@ +/** + * + */ +package com.google.refine.operations; + +public enum OnError { + KeepOriginal, + SetToBlank, + StoreError +} \ No newline at end of file diff --git a/main/src/com/google/refine/operations/OperationRegistry.java b/main/src/com/google/refine/operations/OperationRegistry.java new file mode 100644 index 000000000..6f8fcdd0b --- /dev/null +++ b/main/src/com/google/refine/operations/OperationRegistry.java @@ -0,0 +1,45 @@ +package com.google.refine.operations; + +import java.lang.reflect.Method; +import java.util.HashMap; +import java.util.Map; + +import org.json.JSONObject; + +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; + +import edu.mit.simile.butterfly.ButterflyModule; + +public abstract class OperationRegistry { + + static final public Map> s_opNameToClass = new HashMap>(); + static final public Map, String> s_opClassToName = new HashMap, String>(); + + static public void registerOperation(ButterflyModule module, String name, Class klass) { + String key = module.getName() + "/" + name; + + s_opNameToClass.put(key, klass); + s_opClassToName.put(klass, key); + } + + static public AbstractOperation reconstruct(Project project, JSONObject obj) { + try { + String op = obj.getString("op"); + if (!op.contains("/")) { + op = "core/" + op; // backward compatible + } + + Class klass = OperationRegistry.s_opNameToClass.get(op); + if (klass != null) { + Method reconstruct = klass.getMethod("reconstruct", Project.class, JSONObject.class); + if (reconstruct != null) { + return (AbstractOperation) reconstruct.invoke(null, project, obj); + } + } + } catch (Exception e) { + e.printStackTrace(); + } + return null; + } +} diff --git a/main/src/com/google/refine/operations/SaveProtographOperation.java b/main/src/com/google/refine/operations/SaveProtographOperation.java new file mode 100644 index 000000000..3a52fa82c --- /dev/null +++ b/main/src/com/google/refine/operations/SaveProtographOperation.java @@ -0,0 +1,122 @@ +package com.google.refine.operations; + +import java.io.IOException; +import java.io.LineNumberReader; +import java.io.Writer; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.history.Change; +import com.google.refine.history.HistoryEntry; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; +import com.google.refine.protograph.Protograph; +import com.google.refine.util.ParsingUtilities; +import com.google.refine.util.Pool; + +public class SaveProtographOperation extends AbstractOperation { + final protected Protograph _protograph; + + static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { + return new SaveProtographOperation( + Protograph.reconstruct(obj.getJSONObject("protograph")) + ); + } + + public SaveProtographOperation(Protograph protograph) { + _protograph = protograph; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); + writer.key("description"); writer.value("Save protograph"); + writer.key("protograph"); _protograph.write(writer, options); + writer.endObject(); + } + + protected String getBriefDescription(Project project) { + return "Save schema skeleton"; + } + + @Override + protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception { + String description = "Save schema-alignment protograph"; + + Change change = new ProtographChange(_protograph); + + return new HistoryEntry(historyEntryID, project, description, SaveProtographOperation.this, change); + } + + static public class ProtographChange implements Change { + final protected Protograph _newProtograph; + protected Protograph _oldProtograph; + + public ProtographChange(Protograph protograph) { + _newProtograph = protograph; + } + + public void apply(Project project) { + synchronized (project) { + _oldProtograph = (Protograph) project.overlayModels.get("freebaseProtograph"); + + project.overlayModels.put("freebaseProtograph", _newProtograph); + } + } + + public void revert(Project project) { + synchronized (project) { + if (_oldProtograph == null) { + project.overlayModels.remove("freebaseProtograph"); + } else { + project.overlayModels.put("freebaseProtograph", _oldProtograph); + } + } + } + + public void save(Writer writer, Properties options) throws IOException { + writer.write("newProtograph="); writeProtograph(_newProtograph, writer); writer.write('\n'); + writer.write("oldProtograph="); writeProtograph(_oldProtograph, writer); writer.write('\n'); + writer.write("/ec/\n"); // end of change marker + } + + static public Change load(LineNumberReader reader, Pool pool) throws Exception { + Protograph oldProtograph = null; + Protograph newProtograph = null; + + String line; + while ((line = reader.readLine()) != null && !"/ec/".equals(line)) { + int equal = line.indexOf('='); + CharSequence field = line.subSequence(0, equal); + String value = line.substring(equal + 1); + + if ("oldProtograph".equals(field) && value.length() > 0) { + oldProtograph = Protograph.reconstruct(ParsingUtilities.evaluateJsonStringToObject(value)); + } else if ("newProtograph".equals(field) && value.length() > 0) { + newProtograph = Protograph.reconstruct(ParsingUtilities.evaluateJsonStringToObject(value)); + } + } + + ProtographChange change = new ProtographChange(newProtograph); + change._oldProtograph = oldProtograph; + + return change; + } + + static protected void writeProtograph(Protograph p, Writer writer) throws IOException { + if (p != null) { + JSONWriter jsonWriter = new JSONWriter(writer); + try { + p.write(jsonWriter, new Properties()); + } catch (JSONException e) { + e.printStackTrace(); + } + } + } + } +} diff --git a/main/src/com/google/refine/operations/cell/BlankDownOperation.java b/main/src/com/google/refine/operations/cell/BlankDownOperation.java new file mode 100644 index 000000000..0fe4f251b --- /dev/null +++ b/main/src/com/google/refine/operations/cell/BlankDownOperation.java @@ -0,0 +1,101 @@ +package com.google.refine.operations.cell; + +import java.util.List; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.browsing.RowVisitor; +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.Row; +import com.google.refine.model.changes.CellChange; +import com.google.refine.operations.EngineDependentMassCellOperation; +import com.google.refine.operations.OperationRegistry; + +public class BlankDownOperation extends EngineDependentMassCellOperation { + + static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { + JSONObject engineConfig = obj.getJSONObject("engineConfig"); + + return new BlankDownOperation( + engineConfig, + obj.getString("columnName") + ); + } + + public BlankDownOperation( + JSONObject engineConfig, + String columnName + ) { + super(engineConfig, columnName, true); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); + writer.key("description"); writer.value(getBriefDescription(null)); + writer.key("engineConfig"); writer.value(getEngineConfig()); + writer.key("columnName"); writer.value(_columnName); + writer.endObject(); + } + + protected String getBriefDescription(Project project) { + return "Blank down cells in column " + _columnName; + } + + protected String createDescription(Column column, + List cellChanges) { + + return "Blank down " + cellChanges.size() + + " cells in column " + column.getName(); + } + + protected RowVisitor createRowVisitor(Project project, List cellChanges, long historyEntryID) throws Exception { + Column column = project.columnModel.getColumnByName(_columnName); + + return new RowVisitor() { + int cellIndex; + List cellChanges; + Cell previousCell; + + public RowVisitor init(int cellIndex, List cellChanges) { + this.cellIndex = cellIndex; + this.cellChanges = cellChanges; + return this; + } + + @Override + public void start(Project project) { + // nothing to do + } + + @Override + public void end(Project project) { + // nothing to do + } + + public boolean visit(Project project, int rowIndex, Row row) { + Object value = row.getCellValue(cellIndex); + if (ExpressionUtils.isNonBlankData(value)) { + Cell cell = row.getCell(cellIndex); + if (previousCell != null && cell.value.equals(previousCell.value)) { + CellChange cellChange = new CellChange(rowIndex, cellIndex, cell, null); + cellChanges.add(cellChange); + } + previousCell = cell; + } else { + previousCell = null; + } + return false; + } + }.init(column.getCellIndex(), cellChanges); + } +} diff --git a/main/src/com/google/refine/operations/cell/FillDownOperation.java b/main/src/com/google/refine/operations/cell/FillDownOperation.java new file mode 100644 index 000000000..a606572e8 --- /dev/null +++ b/main/src/com/google/refine/operations/cell/FillDownOperation.java @@ -0,0 +1,97 @@ +package com.google.refine.operations.cell; + +import java.util.List; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.browsing.RowVisitor; +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.Row; +import com.google.refine.model.changes.CellChange; +import com.google.refine.operations.EngineDependentMassCellOperation; +import com.google.refine.operations.OperationRegistry; + +public class FillDownOperation extends EngineDependentMassCellOperation { + + static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { + JSONObject engineConfig = obj.getJSONObject("engineConfig"); + + return new FillDownOperation( + engineConfig, + obj.getString("columnName") + ); + } + + public FillDownOperation( + JSONObject engineConfig, + String columnName + ) { + super(engineConfig, columnName, true); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); + writer.key("description"); writer.value(getBriefDescription(null)); + writer.key("engineConfig"); writer.value(getEngineConfig()); + writer.key("columnName"); writer.value(_columnName); + writer.endObject(); + } + + protected String getBriefDescription(Project project) { + return "Fill down cells in column " + _columnName; + } + + protected String createDescription(Column column, + List cellChanges) { + + return "Fill down " + cellChanges.size() + + " cells in column " + column.getName(); + } + + protected RowVisitor createRowVisitor(Project project, List cellChanges, long historyEntryID) throws Exception { + Column column = project.columnModel.getColumnByName(_columnName); + + return new RowVisitor() { + int cellIndex; + List cellChanges; + Cell previousCell; + + public RowVisitor init(int cellIndex, List cellChanges) { + this.cellIndex = cellIndex; + this.cellChanges = cellChanges; + return this; + } + + @Override + public void start(Project project) { + // nothing to do + } + + @Override + public void end(Project project) { + // nothing to do + } + + public boolean visit(Project project, int rowIndex, Row row) { + Object value = row.getCellValue(cellIndex); + if (ExpressionUtils.isNonBlankData(value)) { + previousCell = row.getCell(cellIndex); + } else if (previousCell != null) { + CellChange cellChange = new CellChange(rowIndex, cellIndex, row.getCell(cellIndex), previousCell); + cellChanges.add(cellChange); + } + return false; + } + }.init(column.getCellIndex(), cellChanges); + } +} diff --git a/main/src/com/google/refine/operations/cell/MassEditOperation.java b/main/src/com/google/refine/operations/cell/MassEditOperation.java new file mode 100644 index 000000000..6dcae2df3 --- /dev/null +++ b/main/src/com/google/refine/operations/cell/MassEditOperation.java @@ -0,0 +1,242 @@ +package com.google.refine.operations.cell; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; + +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.Jsonizable; +import com.google.refine.browsing.RowVisitor; +import com.google.refine.expr.Evaluable; +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.expr.MetaParser; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.Row; +import com.google.refine.model.changes.CellChange; +import com.google.refine.operations.EngineDependentMassCellOperation; +import com.google.refine.operations.OperationRegistry; +import com.google.refine.util.ParsingUtilities; + +public class MassEditOperation extends EngineDependentMassCellOperation { + final protected String _expression; + final protected List _edits; + + static public class Edit implements Jsonizable { + final public List from; + final public boolean fromBlank; + final public boolean fromError; + final public Serializable to; + + public Edit(List from, boolean fromBlank, boolean fromError, Serializable to) { + this.from = from; + this.fromBlank = fromBlank; + this.fromError = fromError; + this.to = to; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("fromBlank"); writer.value(fromBlank); + writer.key("fromError"); writer.value(fromError); + writer.key("from"); + writer.array(); + for (String s : from) { + writer.value(s); + } + writer.endArray(); + writer.key("to"); writer.value(to); + writer.endObject(); + } + } + + static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { + JSONObject engineConfig = obj.has("engineConfig") && !obj.isNull("engineConfig") ? + obj.getJSONObject("engineConfig") : null; + + return new MassEditOperation( + engineConfig, + obj.getString("columnName"), + obj.getString("expression"), + reconstructEdits(obj.getJSONArray("edits")) + ); + } + + static public List reconstructEdits(JSONArray editsA) throws Exception { + int editCount = editsA.length(); + + List edits = new ArrayList(editCount); + for (int i = 0; i < editCount; i++) { + JSONObject editO = editsA.getJSONObject(i); + + List from = null; + if (editO.has("from") && !editO.isNull("from")) { + JSONArray fromA = editO.getJSONArray("from"); + int fromCount = fromA.length(); + + from = new ArrayList(fromCount); + for (int j = 0; j < fromCount; j++) { + from.add(fromA.getString(j)); + } + } else { + from = new ArrayList(); + } + + boolean fromBlank = editO.has("fromBlank") && editO.getBoolean("fromBlank"); + boolean fromError = editO.has("fromError") && editO.getBoolean("fromError"); + + Serializable to = (Serializable) editO.get("to"); + if (editO.has("type")) { + String type = editO.getString("type"); + if ("date".equals(type)) { + to = ParsingUtilities.stringToDate((String) to); + } + } + + edits.add(new Edit(from, fromBlank, fromError, to)); + } + + return edits; + } + + public MassEditOperation(JSONObject engineConfig, String columnName, String expression, List edits) { + super(engineConfig, columnName, true); + _expression = expression; + _edits = edits; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); + writer.key("description"); writer.value(getBriefDescription(null)); + writer.key("engineConfig"); writer.value(getEngineConfig()); + writer.key("columnName"); writer.value(_columnName); + writer.key("expression"); writer.value(_expression); + writer.key("edits"); + writer.array(); + for (Edit edit : _edits) { + edit.write(writer, options); + } + writer.endArray(); + writer.endObject(); + } + + protected String getBriefDescription(Project project) { + return "Mass edit cells in column " + _columnName; + } + + protected String createDescription(Column column, + List cellChanges) { + + return "Mass edit " + cellChanges.size() + + " cells in column " + column.getName(); + } + + protected RowVisitor createRowVisitor(Project project, List cellChanges, long historyEntryID) throws Exception { + Column column = project.columnModel.getColumnByName(_columnName); + + Evaluable eval = MetaParser.parse(_expression); + Properties bindings = ExpressionUtils.createBindings(project); + + Map fromTo = new HashMap(); + Serializable fromBlankTo = null; + Serializable fromErrorTo = null; + + for (Edit edit : _edits) { + for (String s : edit.from) { + fromTo.put(s, edit.to); + } + + // the last edit wins + if (edit.fromBlank) { + fromBlankTo = edit.to; + } + if (edit.fromError) { + fromErrorTo = edit.to; + } + } + + return new RowVisitor() { + int cellIndex; + Properties bindings; + List cellChanges; + Evaluable eval; + + Map fromTo; + Serializable fromBlankTo; + Serializable fromErrorTo; + + public RowVisitor init( + int cellIndex, + Properties bindings, + List cellChanges, + Evaluable eval, + Map fromTo, + Serializable fromBlankTo, + Serializable fromErrorTo + ) { + this.cellIndex = cellIndex; + this.bindings = bindings; + this.cellChanges = cellChanges; + this.eval = eval; + this.fromTo = fromTo; + this.fromBlankTo = fromBlankTo; + this.fromErrorTo = fromErrorTo; + return this; + } + + @Override + public void start(Project project) { + // nothing to do + } + + @Override + public void end(Project project) { + // nothing to do + } + + public boolean visit(Project project, int rowIndex, Row row) { + Cell cell = row.getCell(cellIndex); + Cell newCell = null; + + ExpressionUtils.bind(bindings, row, rowIndex, _columnName, cell); + + Object v = eval.evaluate(bindings); + if (ExpressionUtils.isError(v)) { + if (fromErrorTo != null) { + newCell = new Cell(fromErrorTo, (cell != null) ? cell.recon : null); + } + } else if (ExpressionUtils.isNonBlankData(v)) { + String from = v.toString(); + Serializable to = fromTo.get(from); + if (to != null) { + newCell = new Cell(to, (cell != null) ? cell.recon : null); + } + } else { + if (fromBlankTo != null) { + newCell = new Cell(fromBlankTo, (cell != null) ? cell.recon : null); + } + } + + if (newCell != null) { + CellChange cellChange = new CellChange(rowIndex, cellIndex, cell, newCell); + cellChanges.add(cellChange); + } + return false; + } + }.init(column.getCellIndex(), bindings, cellChanges, eval, fromTo, fromBlankTo, fromErrorTo); + } +} diff --git a/main/src/com/google/refine/operations/cell/MultiValuedCellJoinOperation.java b/main/src/com/google/refine/operations/cell/MultiValuedCellJoinOperation.java new file mode 100644 index 000000000..ac27d4ec7 --- /dev/null +++ b/main/src/com/google/refine/operations/cell/MultiValuedCellJoinOperation.java @@ -0,0 +1,130 @@ +package com.google.refine.operations.cell; + +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.history.HistoryEntry; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.Row; +import com.google.refine.model.changes.MassRowChange; +import com.google.refine.operations.OperationRegistry; + +public class MultiValuedCellJoinOperation extends AbstractOperation { + final protected String _columnName; + final protected String _keyColumnName; + final protected String _separator; + + static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { + return new MultiValuedCellJoinOperation( + obj.getString("columnName"), + obj.getString("keyColumnName"), + obj.getString("separator") + ); + } + + public MultiValuedCellJoinOperation( + String columnName, + String keyColumnName, + String separator + ) { + _columnName = columnName; + _keyColumnName = keyColumnName; + _separator = separator; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); + writer.key("description"); writer.value(getBriefDescription(null)); + writer.key("columnName"); writer.value(_columnName); + writer.key("keyColumnName"); writer.value(_keyColumnName); + writer.key("separator"); writer.value(_separator); + writer.endObject(); + } + + protected String getBriefDescription(Project project) { + return "Join multi-valued cells in column " + _columnName; + } + + protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception { + Column column = project.columnModel.getColumnByName(_columnName); + if (column == null) { + throw new Exception("No column named " + _columnName); + } + int cellIndex = column.getCellIndex(); + + Column keyColumn = project.columnModel.getColumnByName(_keyColumnName); + if (keyColumn == null) { + throw new Exception("No key column named " + _keyColumnName); + } + int keyCellIndex = keyColumn.getCellIndex(); + + List newRows = new ArrayList(); + + int oldRowCount = project.rows.size(); + for (int r = 0; r < oldRowCount; r++) { + Row oldRow = project.rows.get(r); + + if (oldRow.isCellBlank(keyCellIndex)) { + newRows.add(oldRow.dup()); + continue; + } + + int r2 = r + 1; + while (r2 < oldRowCount && project.rows.get(r2).isCellBlank(keyCellIndex)) { + r2++; + } + + if (r2 == r + 1) { + newRows.add(oldRow.dup()); + continue; + } + + StringBuffer sb = new StringBuffer(); + for (int r3 = r; r3 < r2; r3++) { + Object value = project.rows.get(r3).getCellValue(cellIndex); + if (ExpressionUtils.isNonBlankData(value)) { + if (sb.length() > 0) { + sb.append(_separator); + } + sb.append(value.toString()); + } + } + + for (int r3 = r; r3 < r2; r3++) { + Row newRow = project.rows.get(r3).dup(); + if (r3 == r) { + newRow.setCell(cellIndex, new Cell(sb.toString(), null)); + } else { + newRow.setCell(cellIndex, null); + } + + if (!newRow.isEmpty()) { + newRows.add(newRow); + } + } + + r = r2 - 1; // r will be incremented by the for loop anyway + } + + return new HistoryEntry( + historyEntryID, + project, + getBriefDescription(null), + this, + new MassRowChange(newRows) + ); + } + +} diff --git a/main/src/com/google/refine/operations/cell/MultiValuedCellSplitOperation.java b/main/src/com/google/refine/operations/cell/MultiValuedCellSplitOperation.java new file mode 100644 index 000000000..f0baa20ce --- /dev/null +++ b/main/src/com/google/refine/operations/cell/MultiValuedCellSplitOperation.java @@ -0,0 +1,147 @@ +package com.google.refine.operations.cell; + + import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +import org.apache.commons.lang.StringUtils; +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.history.HistoryEntry; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.Row; +import com.google.refine.model.changes.MassRowChange; +import com.google.refine.operations.OperationRegistry; + +public class MultiValuedCellSplitOperation extends AbstractOperation { + final protected String _columnName; + final protected String _keyColumnName; + final protected String _separator; + final protected String _mode; + + static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { + return new MultiValuedCellSplitOperation( + obj.getString("columnName"), + obj.getString("keyColumnName"), + obj.getString("separator"), + obj.getString("mode") + ); + } + + public MultiValuedCellSplitOperation( + String columnName, + String keyColumnName, + String separator, + String mode + ) { + _columnName = columnName; + _keyColumnName = keyColumnName; + _separator = separator; + _mode = mode; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); + writer.key("description"); writer.value("Split multi-valued cells in column " + _columnName); + writer.key("columnName"); writer.value(_columnName); + writer.key("keyColumnName"); writer.value(_keyColumnName); + writer.key("separator"); writer.value(_separator); + writer.key("mode"); writer.value(_mode); + writer.endObject(); + } + + protected String getBriefDescription(Project project) { + return "Split multi-valued cells in column " + _columnName; + } + + @Override + protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception { + Column column = project.columnModel.getColumnByName(_columnName); + if (column == null) { + throw new Exception("No column named " + _columnName); + } + int cellIndex = column.getCellIndex(); + + Column keyColumn = project.columnModel.getColumnByName(_keyColumnName); + if (keyColumn == null) { + throw new Exception("No key column named " + _keyColumnName); + } + int keyCellIndex = keyColumn.getCellIndex(); + + List newRows = new ArrayList(); + + int oldRowCount = project.rows.size(); + for (int r = 0; r < oldRowCount; r++) { + Row oldRow = project.rows.get(r); + if (oldRow.isCellBlank(cellIndex)) { + newRows.add(oldRow.dup()); + continue; + } + + Object value = oldRow.getCellValue(cellIndex); + String s = value instanceof String ? ((String) value) : value.toString(); + String[] values = null; + if (_mode.equals("regex")) { + values = s.split(_separator); + } else { + values = StringUtils.splitByWholeSeparatorPreserveAllTokens(s, _separator); + } + + if (values.length < 2) { + newRows.add(oldRow.dup()); + continue; + } + + // First value goes into the same row + { + Row firstNewRow = oldRow.dup(); + firstNewRow.setCell(cellIndex, new Cell(values[0].trim(), null)); + + newRows.add(firstNewRow); + } + + int r2 = r + 1; + for (int v = 1; v < values.length; v++) { + Cell newCell = new Cell(values[v].trim(), null); + + if (r2 < project.rows.size()) { + Row oldRow2 = project.rows.get(r2); + if (oldRow2.isCellBlank(cellIndex) && + oldRow2.isCellBlank(keyCellIndex)) { + + Row newRow = oldRow2.dup(); + newRow.setCell(cellIndex, newCell); + + newRows.add(newRow); + r2++; + + continue; + } + } + + Row newRow = new Row(cellIndex + 1); + newRow.setCell(cellIndex, newCell); + + newRows.add(newRow); + } + + r = r2 - 1; // r will be incremented by the for loop anyway + } + + return new HistoryEntry( + historyEntryID, + project, + getBriefDescription(null), + this, + new MassRowChange(newRows) + ); + } +} diff --git a/main/src/com/google/refine/operations/cell/TextTransformOperation.java b/main/src/com/google/refine/operations/cell/TextTransformOperation.java new file mode 100644 index 000000000..7f195212d --- /dev/null +++ b/main/src/com/google/refine/operations/cell/TextTransformOperation.java @@ -0,0 +1,194 @@ +package com.google.refine.operations.cell; + +import java.io.Serializable; +import java.util.List; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.browsing.RowVisitor; +import com.google.refine.expr.Evaluable; +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.expr.MetaParser; +import com.google.refine.expr.WrappedCell; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.Row; +import com.google.refine.model.changes.CellChange; +import com.google.refine.operations.EngineDependentMassCellOperation; +import com.google.refine.operations.OnError; +import com.google.refine.operations.OperationRegistry; + +public class TextTransformOperation extends EngineDependentMassCellOperation { + final protected String _expression; + final protected OnError _onError; + final protected boolean _repeat; + final protected int _repeatCount; + + static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { + JSONObject engineConfig = obj.getJSONObject("engineConfig"); + + return new TextTransformOperation( + engineConfig, + obj.getString("columnName"), + obj.getString("expression"), + stringToOnError(obj.getString("onError")), + obj.getBoolean("repeat"), + obj.getInt("repeatCount") + ); + } + + static public OnError stringToOnError(String s) { + if ("set-to-blank".equalsIgnoreCase(s)) { + return OnError.SetToBlank; + } else if ("store-error".equalsIgnoreCase(s)) { + return OnError.StoreError; + } else { + return OnError.KeepOriginal; + } + } + static public String onErrorToString(OnError onError) { + if (onError == OnError.SetToBlank) { + return "set-to-blank"; + } else if (onError == OnError.StoreError) { + return "store-error"; + } else { + return "keep-original"; + } + } + + public TextTransformOperation( + JSONObject engineConfig, + String columnName, + String expression, + OnError onError, + boolean repeat, + int repeatCount + ) { + super(engineConfig, columnName, true); + _expression = expression; + _onError = onError; + _repeat = repeat; + _repeatCount = repeatCount; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); + writer.key("description"); writer.value(getBriefDescription(null)); + writer.key("engineConfig"); writer.value(getEngineConfig()); + writer.key("columnName"); writer.value(_columnName); + writer.key("expression"); writer.value(_expression); + writer.key("onError"); writer.value(onErrorToString(_onError)); + writer.key("repeat"); writer.value(_repeat); + writer.key("repeatCount"); writer.value(_repeatCount); + writer.endObject(); + } + + protected String getBriefDescription(Project project) { + return "Text transform on cells in column " + _columnName + " using expression " + _expression; + } + + protected String createDescription(Column column, + List cellChanges) { + + return "Text transform on " + cellChanges.size() + + " cells in column " + column.getName() + ": " + _expression; + } + + protected RowVisitor createRowVisitor(Project project, List cellChanges, long historyEntryID) throws Exception { + Column column = project.columnModel.getColumnByName(_columnName); + + Evaluable eval = MetaParser.parse(_expression); + Properties bindings = ExpressionUtils.createBindings(project); + + return new RowVisitor() { + int cellIndex; + Properties bindings; + List cellChanges; + Evaluable eval; + + public RowVisitor init(int cellIndex, Properties bindings, List cellChanges, Evaluable eval) { + this.cellIndex = cellIndex; + this.bindings = bindings; + this.cellChanges = cellChanges; + this.eval = eval; + return this; + } + + @Override + public void start(Project project) { + // nothing to do + } + + @Override + public void end(Project project) { + // nothing to do + } + + public boolean visit(Project project, int rowIndex, Row row) { + Cell cell = row.getCell(cellIndex); + Cell newCell = null; + + Object oldValue = cell != null ? cell.value : null; + + ExpressionUtils.bind(bindings, row, rowIndex, _columnName, cell); + + Object o = eval.evaluate(bindings); + if (o == null) { + if (oldValue != null) { + CellChange cellChange = new CellChange(rowIndex, cellIndex, cell, null); + cellChanges.add(cellChange); + } + } else { + if (o instanceof Cell) { + newCell = (Cell) o; + } else if (o instanceof WrappedCell) { + newCell = ((WrappedCell) o).cell; + } else { + Serializable newValue = ExpressionUtils.wrapStorable(o); + if (ExpressionUtils.isError(newValue)) { + if (_onError == OnError.KeepOriginal) { + return false; + } else if (_onError == OnError.SetToBlank) { + newValue = null; + } + } + + if (!ExpressionUtils.sameValue(oldValue, newValue)) { + newCell = new Cell(newValue, (cell != null) ? cell.recon : null); + + if (_repeat) { + for (int i = 0; i < _repeatCount; i++) { + ExpressionUtils.bind(bindings, row, rowIndex, _columnName, newCell); + + newValue = ExpressionUtils.wrapStorable(eval.evaluate(bindings)); + if (ExpressionUtils.isError(newValue)) { + break; + } else if (ExpressionUtils.sameValue(newCell.value, newValue)) { + break; + } + + newCell = new Cell(newValue, newCell.recon); + } + } + } + } + + if (newCell != null) { + CellChange cellChange = new CellChange(rowIndex, cellIndex, cell, newCell); + cellChanges.add(cellChange); + } + } + + return false; + } + }.init(column.getCellIndex(), bindings, cellChanges, eval); + } +} diff --git a/main/src/com/google/refine/operations/cell/TransposeColumnsIntoRowsOperation.java b/main/src/com/google/refine/operations/cell/TransposeColumnsIntoRowsOperation.java new file mode 100644 index 000000000..faf1770d5 --- /dev/null +++ b/main/src/com/google/refine/operations/cell/TransposeColumnsIntoRowsOperation.java @@ -0,0 +1,177 @@ +package com.google.refine.operations.cell; + +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.history.HistoryEntry; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.Row; +import com.google.refine.model.changes.MassRowColumnChange; +import com.google.refine.operations.OperationRegistry; + +public class TransposeColumnsIntoRowsOperation extends AbstractOperation { + final protected String _startColumnName; + final protected int _columnCount; + final protected String _combinedColumnName; + final protected boolean _prependColumnName; + final protected String _separator; + final protected boolean _ignoreBlankCells; + + static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { + return new TransposeColumnsIntoRowsOperation( + obj.getString("startColumnName"), + obj.getInt("columnCount"), + obj.getString("combinedColumnName"), + obj.getBoolean("prependColumnName"), + obj.getString("separator"), + obj.getBoolean("ignoreBlankCells") + ); + } + + public TransposeColumnsIntoRowsOperation( + String startColumnName, + int columnCount, + String combinedColumnName, + boolean prependColumnName, + String separator, + boolean ignoreBlankCells + ) { + _startColumnName = startColumnName; + _columnCount = columnCount; + _combinedColumnName = combinedColumnName; + _prependColumnName = prependColumnName; + _separator = separator; + _ignoreBlankCells = ignoreBlankCells; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); + writer.key("description"); writer.value("Transpose cells in " + _columnCount + " column(s) starting with " + _startColumnName + " into rows"); + writer.key("startColumnName"); writer.value(_startColumnName); + writer.key("columnCount"); writer.value(_columnCount); + writer.key("combinedColumnName"); writer.value(_combinedColumnName); + writer.key("prependColumnName"); writer.value(_prependColumnName); + writer.key("separator"); writer.value(_separator); + writer.key("ignoreBlankCells"); writer.value(_ignoreBlankCells); + writer.endObject(); + } + + protected String getBriefDescription(Project project) { + return "Transpose cells in " + _columnCount + " column(s) starting with " + _startColumnName + " into rows"; + } + + @Override + protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception { + if (_combinedColumnName != null && + !_combinedColumnName.isEmpty() && + project.columnModel.getColumnByName(_combinedColumnName) != null) { + throw new Exception("Another column already named " + _combinedColumnName); + } + + List newColumns = new ArrayList(); + List oldColumns = project.columnModel.columns; + + int columnsLeftToTranspose = _columnCount; + int startColumnIndex = oldColumns.size(); + for (int c = 0; c < oldColumns.size(); c++) { + Column column = oldColumns.get(c); + if (columnsLeftToTranspose == 0) { + // This column is beyond the columns to transpose + + Column newColumn = new Column(newColumns.size(), column.getOriginalHeaderLabel()); + newColumn.setName(column.getName()); + + newColumns.add(newColumn); + } else if (columnsLeftToTranspose < _columnCount) { + // This column is a column to transpose, but not the first + // nothing to do + + columnsLeftToTranspose--; + } else if (_startColumnName.equals(column.getName())) { + // This is the first column to transpose + + startColumnIndex = c; + + String columnName = _combinedColumnName != null && _combinedColumnName.length() > 0 ? _combinedColumnName : column.getName(); + Column newColumn = new Column(newColumns.size(), columnName); + + newColumns.add(newColumn); + + columnsLeftToTranspose--; + } else { + // This column is before all columns to transpose + + Column newColumn = new Column(newColumns.size(), column.getOriginalHeaderLabel()); + newColumn.setName(column.getName()); + + newColumns.add(newColumn); + } + } + + + List oldRows = project.rows; + List newRows = new ArrayList(oldRows.size() * _columnCount); + for (int r = 0; r < oldRows.size(); r++) { + Row oldRow = project.rows.get(r); + Row firstNewRow = new Row(newColumns.size()); + + newRows.add(firstNewRow); + + int transposedCells = 0; + for (int c = 0; c < oldColumns.size(); c++) { + Column column = oldColumns.get(c); + Cell cell = oldRow.getCell(column.getCellIndex()); + + if (c < startColumnIndex) { + firstNewRow.setCell(c, cell); + } else if (c == startColumnIndex || c < startColumnIndex + _columnCount) { + Cell newCell; + + if (cell == null || cell.value == null) { + if (_prependColumnName && !_ignoreBlankCells) { + newCell = new Cell(column.getName() + _separator, null); + } else { + continue; + } + } else if (_prependColumnName) { + newCell = new Cell(column.getName() + _separator + cell.value, null); + } else { + newCell = cell; + } + + if (transposedCells == 0) { + firstNewRow.setCell(startColumnIndex, newCell); + } else { + Row newRow = new Row(newColumns.size()); + + newRow.setCell(startColumnIndex, newCell); + newRows.add(newRow); + } + + transposedCells++; + } else { + firstNewRow.setCell(c - _columnCount + 1, cell); + } + } + } + + return new HistoryEntry( + historyEntryID, + project, + getBriefDescription(null), + this, + new MassRowColumnChange(newColumns, newRows) + ); + } +} diff --git a/main/src/com/google/refine/operations/cell/TransposeRowsIntoColumnsOperation.java b/main/src/com/google/refine/operations/cell/TransposeRowsIntoColumnsOperation.java new file mode 100644 index 000000000..b43dd6b5b --- /dev/null +++ b/main/src/com/google/refine/operations/cell/TransposeRowsIntoColumnsOperation.java @@ -0,0 +1,125 @@ +package com.google.refine.operations.cell; + +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.history.HistoryEntry; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.Row; +import com.google.refine.model.changes.MassRowColumnChange; +import com.google.refine.operations.OperationRegistry; + +public class TransposeRowsIntoColumnsOperation extends AbstractOperation { + final protected String _columnName; + final protected int _rowCount; + + static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { + return new TransposeRowsIntoColumnsOperation( + obj.getString("columnName"), + obj.getInt("rowCount") + ); + } + + public TransposeRowsIntoColumnsOperation( + String columnName, + int rowCount + ) { + _columnName = columnName; + _rowCount = rowCount; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); + writer.key("description"); writer.value("Transpose every " + _rowCount + " cells in column " + _columnName + " into separate columns"); + writer.key("columnName"); writer.value(_columnName); + writer.key("rowCount"); writer.value(_rowCount); + writer.endObject(); + } + + protected String getBriefDescription(Project project) { + return "Transpose every " + _rowCount + " cells in column " + _columnName + " into separate columns"; + } + + @Override + protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception { + List newColumns = new ArrayList(); + List oldColumns = project.columnModel.columns; + + int columnIndex = project.columnModel.getColumnIndexByName(_columnName); + int columnCount = oldColumns.size(); + + newColumns.addAll(oldColumns.subList(0, columnIndex)); + + for (int i = 0; i < columnCount; i++) { + Column column = oldColumns.get(i); + + if (i == columnIndex) { + int newIndex = 1; + for (int n = 0; n < _rowCount; n++) { + String columnName = _columnName + " " + newIndex++; + while (project.columnModel.getColumnByName(columnName) != null) { + columnName = _columnName + " " + newIndex++; + } + + newColumns.add(new Column(i + n, columnName)); + } + } else if (i < columnIndex) { + newColumns.add(new Column(i, column.getName())); + } else { + newColumns.add(new Column(i + _rowCount - 1, column.getName())); + } + } + + List oldRows = project.rows; + List newRows = new ArrayList(oldRows.size() / _rowCount); + for (int r = 0; r < oldRows.size(); r += _rowCount) { + Row firstNewRow = new Row(newColumns.size()); + + for (int r2 = 0; r2 < _rowCount && r + r2 < oldRows.size(); r2++) { + Row oldRow = oldRows.get(r + r2); + Row newRow = r2 == 0 ? firstNewRow : new Row(newColumns.size()); + boolean hasData = r2 == 0; + + for (int c = 0; c < oldColumns.size(); c++) { + Column column = oldColumns.get(c); + Cell cell = oldRow.getCell(column.getCellIndex()); + + if (cell != null && cell.value != null) { + if (c == columnIndex) { + firstNewRow.setCell(columnIndex + r2, cell); + } else if (c < columnIndex) { + newRow.setCell(c, cell); + hasData = true; + } else { + newRow.setCell(c + _rowCount - 1, cell); + hasData = true; + } + } + } + + if (hasData) { + newRows.add(newRow); + } + } + } + + return new HistoryEntry( + historyEntryID, + project, + getBriefDescription(null), + this, + new MassRowColumnChange(newColumns, newRows) + ); + } +} diff --git a/main/src/com/google/refine/operations/column/ColumnAdditionByFetchingURLsOperation.java b/main/src/com/google/refine/operations/column/ColumnAdditionByFetchingURLsOperation.java new file mode 100644 index 000000000..7e259ecba --- /dev/null +++ b/main/src/com/google/refine/operations/column/ColumnAdditionByFetchingURLsOperation.java @@ -0,0 +1,291 @@ +package com.google.refine.operations.column; + +import java.io.InputStream; +import java.io.Serializable; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.browsing.Engine; +import com.google.refine.browsing.FilteredRows; +import com.google.refine.browsing.RowVisitor; +import com.google.refine.expr.EvalError; +import com.google.refine.expr.Evaluable; +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.expr.MetaParser; +import com.google.refine.expr.WrappedCell; +import com.google.refine.history.HistoryEntry; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.Row; +import com.google.refine.model.changes.CellAtRow; +import com.google.refine.model.changes.ColumnAdditionChange; +import com.google.refine.operations.EngineDependentOperation; +import com.google.refine.operations.OnError; +import com.google.refine.operations.OperationRegistry; +import com.google.refine.operations.cell.TextTransformOperation; +import com.google.refine.process.LongRunningProcess; +import com.google.refine.process.Process; +import com.google.refine.util.ParsingUtilities; + +public class ColumnAdditionByFetchingURLsOperation extends EngineDependentOperation { + final protected String _baseColumnName; + final protected String _urlExpression; + final protected OnError _onError; + + final protected String _newColumnName; + final protected int _columnInsertIndex; + final protected int _delay; + + static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { + JSONObject engineConfig = obj.getJSONObject("engineConfig"); + + return new ColumnAdditionByFetchingURLsOperation( + engineConfig, + obj.getString("baseColumnName"), + obj.getString("urlExpression"), + TextTransformOperation.stringToOnError(obj.getString("onError")), + obj.getString("newColumnName"), + obj.getInt("columnInsertIndex"), + obj.getInt("delay") + ); + } + + public ColumnAdditionByFetchingURLsOperation( + JSONObject engineConfig, + String baseColumnName, + String urlExpression, + OnError onError, + String newColumnName, + int columnInsertIndex, + int delay + ) { + super(engineConfig); + + _baseColumnName = baseColumnName; + _urlExpression = urlExpression; + _onError = onError; + + _newColumnName = newColumnName; + _columnInsertIndex = columnInsertIndex; + + _delay = delay; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); + writer.key("description"); writer.value(getBriefDescription(null)); + writer.key("engineConfig"); writer.value(getEngineConfig()); + writer.key("newColumnName"); writer.value(_newColumnName); + writer.key("columnInsertIndex"); writer.value(_columnInsertIndex); + writer.key("baseColumnName"); writer.value(_baseColumnName); + writer.key("urlExpression"); writer.value(_urlExpression); + writer.key("onError"); writer.value(TextTransformOperation.onErrorToString(_onError)); + writer.key("delay"); writer.value(_delay); + writer.endObject(); + } + + protected String getBriefDescription(Project project) { + return "Create column " + _newColumnName + + " at index " + _columnInsertIndex + + " by fetching URLs based on column " + _baseColumnName + + " using expression " + _urlExpression; + } + + protected String createDescription(Column column, List cellsAtRows) { + return "Create new column " + _newColumnName + + ", filling " + cellsAtRows.size() + + " rows by fetching URLs based on column " + column.getName() + + " and formulated as " + _urlExpression; + } + + + public Process createProcess(Project project, Properties options) throws Exception { + Column column = project.columnModel.getColumnByName(_baseColumnName); + if (column == null) { + throw new Exception("No column named " + _baseColumnName); + } + if (project.columnModel.getColumnByName(_newColumnName) != null) { + throw new Exception("Another column already named " + _newColumnName); + } + + Engine engine = createEngine(project); + engine.initializeFromJSON(_engineConfig); + + Evaluable eval = MetaParser.parse(_urlExpression); + + return new ColumnAdditionByFetchingURLsProcess( + project, + engine, + eval, + getBriefDescription(null) + ); + } + + public class ColumnAdditionByFetchingURLsProcess extends LongRunningProcess implements Runnable { + final protected Project _project; + final protected Engine _engine; + final protected Evaluable _eval; + final protected long _historyEntryID; + protected int _cellIndex; + + public ColumnAdditionByFetchingURLsProcess( + Project project, + Engine engine, + Evaluable eval, + String description + ) throws JSONException { + super(description); + _project = project; + _engine = engine; + _eval = eval; + _historyEntryID = HistoryEntry.allocateID(); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("id"); writer.value(hashCode()); + writer.key("description"); writer.value(_description); + writer.key("immediate"); writer.value(false); + writer.key("status"); writer.value(_thread == null ? "pending" : (_thread.isAlive() ? "running" : "done")); + writer.key("progress"); writer.value(_progress); + writer.endObject(); + } + + protected Runnable getRunnable() { + return this; + } + + public void run() { + List urls = new ArrayList(_project.rows.size()); + + FilteredRows filteredRows = _engine.getAllFilteredRows(); + filteredRows.accept(_project, createRowVisitor(urls)); + + List responseBodies = new ArrayList(urls.size()); + for (int i = 0; i < urls.size(); i++) { + CellAtRow urlData = urls.get(i); + CellAtRow cellAtRow = fetch(urlData); + if (cellAtRow != null) { + responseBodies.add(cellAtRow); + } + + _progress = i * 100 / urls.size(); + try { + Thread.sleep(_delay); + } catch (InterruptedException e) { + if (_canceled) { + break; + } + } + } + + if (!_canceled) { + + HistoryEntry historyEntry = new HistoryEntry( + _historyEntryID, + _project, + _description, + ColumnAdditionByFetchingURLsOperation.this, + new ColumnAdditionChange( + _newColumnName, + _columnInsertIndex, + responseBodies) + ); + + _project.history.addEntry(historyEntry); + _project.processManager.onDoneProcess(this); + } + } + + CellAtRow fetch(CellAtRow urlData) { + String urlString = urlData.cell.value.toString(); + URL url = null; + + try { + url = new URL(urlString); + } catch (MalformedURLException e) { + return null; + } + + try { + InputStream is = url.openStream(); + try { + return new CellAtRow(urlData.row, new Cell(ParsingUtilities.inputStreamToString(is), null)); + } finally { + is.close(); + } + } catch (Exception e) { + return _onError == OnError.StoreError ? + new CellAtRow(urlData.row, new Cell(new EvalError(e.getMessage()), null)) : null; + } + } + + RowVisitor createRowVisitor(List cellsAtRows) { + return new RowVisitor() { + int cellIndex; + Properties bindings; + List cellsAtRows; + + public RowVisitor init(List cellsAtRows) { + Column column = _project.columnModel.getColumnByName(_baseColumnName); + + this.cellIndex = column.getCellIndex(); + this.bindings = ExpressionUtils.createBindings(_project); + this.cellsAtRows = cellsAtRows; + return this; + } + + @Override + public void start(Project project) { + // nothing to do + } + + @Override + public void end(Project project) { + // nothing to do + } + + public boolean visit(Project project, int rowIndex, Row row) { + Cell cell = row.getCell(cellIndex); + Cell newCell = null; + + ExpressionUtils.bind(bindings, row, rowIndex, _baseColumnName, cell); + + Object o = _eval.evaluate(bindings); + if (o != null) { + if (o instanceof Cell) { + newCell = (Cell) o; + } else if (o instanceof WrappedCell) { + newCell = ((WrappedCell) o).cell; + } else { + Serializable v = ExpressionUtils.wrapStorable(o); + if (ExpressionUtils.isNonBlankData(v)) { + newCell = new Cell(v.toString(), null); + } + } + } + + if (newCell != null) { + cellsAtRows.add(new CellAtRow(rowIndex, newCell)); + } + + return false; + } + }.init(cellsAtRows); + } + } +} diff --git a/main/src/com/google/refine/operations/column/ColumnAdditionOperation.java b/main/src/com/google/refine/operations/column/ColumnAdditionOperation.java new file mode 100644 index 000000000..d2433ba96 --- /dev/null +++ b/main/src/com/google/refine/operations/column/ColumnAdditionOperation.java @@ -0,0 +1,191 @@ +package com.google.refine.operations.column; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.browsing.Engine; +import com.google.refine.browsing.FilteredRows; +import com.google.refine.browsing.RowVisitor; +import com.google.refine.expr.Evaluable; +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.expr.MetaParser; +import com.google.refine.expr.WrappedCell; +import com.google.refine.history.Change; +import com.google.refine.history.HistoryEntry; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.Row; +import com.google.refine.model.changes.CellAtRow; +import com.google.refine.model.changes.ColumnAdditionChange; +import com.google.refine.operations.EngineDependentOperation; +import com.google.refine.operations.OnError; +import com.google.refine.operations.OperationRegistry; +import com.google.refine.operations.cell.TextTransformOperation; + +public class ColumnAdditionOperation extends EngineDependentOperation { + final protected String _baseColumnName; + final protected String _expression; + final protected OnError _onError; + + final protected String _newColumnName; + final protected int _columnInsertIndex; + + static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { + JSONObject engineConfig = obj.getJSONObject("engineConfig"); + + return new ColumnAdditionOperation( + engineConfig, + obj.getString("baseColumnName"), + obj.getString("expression"), + TextTransformOperation.stringToOnError(obj.getString("onError")), + obj.getString("newColumnName"), + obj.getInt("columnInsertIndex") + ); + } + + public ColumnAdditionOperation( + JSONObject engineConfig, + String baseColumnName, + String expression, + OnError onError, + String newColumnName, + int columnInsertIndex + ) { + super(engineConfig); + + _baseColumnName = baseColumnName; + _expression = expression; + _onError = onError; + + _newColumnName = newColumnName; + _columnInsertIndex = columnInsertIndex; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); + writer.key("description"); writer.value(getBriefDescription(null)); + writer.key("engineConfig"); writer.value(getEngineConfig()); + writer.key("newColumnName"); writer.value(_newColumnName); + writer.key("columnInsertIndex"); writer.value(_columnInsertIndex); + writer.key("baseColumnName"); writer.value(_baseColumnName); + writer.key("expression"); writer.value(_expression); + writer.key("onError"); writer.value(TextTransformOperation.onErrorToString(_onError)); + writer.endObject(); + } + + protected String getBriefDescription(Project project) { + return "Create column " + _newColumnName + + " at index " + _columnInsertIndex + + " based on column " + _baseColumnName + + " using expression " + _expression; + } + + protected String createDescription(Column column, List cellsAtRows) { + return "Create new column " + _newColumnName + + " based on column " + column.getName() + + " by filling " + cellsAtRows.size() + + " rows with " + _expression; + } + + protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception { + Engine engine = createEngine(project); + + Column column = project.columnModel.getColumnByName(_baseColumnName); + if (column == null) { + throw new Exception("No column named " + _baseColumnName); + } + if (project.columnModel.getColumnByName(_newColumnName) != null) { + throw new Exception("Another column already named " + _newColumnName); + } + + List cellsAtRows = new ArrayList(project.rows.size()); + + FilteredRows filteredRows = engine.getAllFilteredRows(); + filteredRows.accept(project, createRowVisitor(project, cellsAtRows)); + + String description = createDescription(column, cellsAtRows); + + Change change = new ColumnAdditionChange(_newColumnName, _columnInsertIndex, cellsAtRows); + + return new HistoryEntry( + historyEntryID, project, description, this, change); + } + + protected RowVisitor createRowVisitor(Project project, List cellsAtRows) throws Exception { + Column column = project.columnModel.getColumnByName(_baseColumnName); + + Evaluable eval = MetaParser.parse(_expression); + Properties bindings = ExpressionUtils.createBindings(project); + + return new RowVisitor() { + int cellIndex; + Properties bindings; + List cellsAtRows; + Evaluable eval; + + public RowVisitor init(int cellIndex, Properties bindings, List cellsAtRows, Evaluable eval) { + this.cellIndex = cellIndex; + this.bindings = bindings; + this.cellsAtRows = cellsAtRows; + this.eval = eval; + return this; + } + + @Override + public void start(Project project) { + // nothing to do + } + + @Override + public void end(Project project) { + // nothing to do + } + + public boolean visit(Project project, int rowIndex, Row row) { + Cell cell = row.getCell(cellIndex); + Cell newCell = null; + + ExpressionUtils.bind(bindings, row, rowIndex, _baseColumnName, cell); + + Object o = eval.evaluate(bindings); + if (o != null) { + if (o instanceof Cell) { + newCell = (Cell) o; + } else if (o instanceof WrappedCell) { + newCell = ((WrappedCell) o).cell; + } else { + Serializable v = ExpressionUtils.wrapStorable(o); + if (ExpressionUtils.isError(v)) { + if (_onError == OnError.SetToBlank) { + return false; + } else if (_onError == OnError.KeepOriginal) { + v = cell != null ? cell.value : null; + } + } + + if (v != null) { + newCell = new Cell(v, null); + } + } + } + + if (newCell != null) { + cellsAtRows.add(new CellAtRow(rowIndex, newCell)); + } + + return false; + } + }.init(column.getCellIndex(), bindings, cellsAtRows, eval); + } +} diff --git a/main/src/com/google/refine/operations/column/ColumnMoveOperation.java b/main/src/com/google/refine/operations/column/ColumnMoveOperation.java new file mode 100644 index 000000000..c44c70e29 --- /dev/null +++ b/main/src/com/google/refine/operations/column/ColumnMoveOperation.java @@ -0,0 +1,60 @@ +package com.google.refine.operations.column; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.history.Change; +import com.google.refine.history.HistoryEntry; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; +import com.google.refine.model.changes.ColumnMoveChange; +import com.google.refine.operations.OperationRegistry; + +public class ColumnMoveOperation extends AbstractOperation { + final protected String _columnName; + final protected int _index; + + static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { + return new ColumnMoveOperation( + obj.getString("columnName"), + obj.getInt("index") + ); + } + + public ColumnMoveOperation( + String columnName, + int index + ) { + _columnName = columnName; + _index = index; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); + writer.key("description"); writer.value("Move column " + _columnName + " to position " + _index); + writer.key("columnName"); writer.value(_columnName); + writer.key("index"); writer.value(_index); + writer.endObject(); + } + + + protected String getBriefDescription(Project project) { + return "Move column " + _columnName + " to position " + _index; + } + + protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception { + if (project.columnModel.getColumnByName(_columnName) == null) { + throw new Exception("No column named " + _columnName); + } + + Change change = new ColumnMoveChange(_columnName, _index); + + return new HistoryEntry(historyEntryID, project, getBriefDescription(null), ColumnMoveOperation.this, change); + } +} diff --git a/main/src/com/google/refine/operations/column/ColumnRemovalOperation.java b/main/src/com/google/refine/operations/column/ColumnRemovalOperation.java new file mode 100644 index 000000000..35f9bdaa0 --- /dev/null +++ b/main/src/com/google/refine/operations/column/ColumnRemovalOperation.java @@ -0,0 +1,59 @@ +package com.google.refine.operations.column; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.history.Change; +import com.google.refine.history.HistoryEntry; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.changes.ColumnRemovalChange; +import com.google.refine.operations.OperationRegistry; + +public class ColumnRemovalOperation extends AbstractOperation { + final protected String _columnName; + + static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { + return new ColumnRemovalOperation( + obj.getString("columnName") + ); + } + + public ColumnRemovalOperation( + String columnName + ) { + _columnName = columnName; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); + writer.key("description"); writer.value("Remove column " + _columnName); + writer.key("columnName"); writer.value(_columnName); + writer.endObject(); + } + + + protected String getBriefDescription(Project project) { + return "Remove column " + _columnName; + } + + protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception { + Column column = project.columnModel.getColumnByName(_columnName); + if (column == null) { + throw new Exception("No column named " + _columnName); + } + + String description = "Remove column " + column.getName(); + + Change change = new ColumnRemovalChange(project.columnModel.columns.indexOf(column)); + + return new HistoryEntry(historyEntryID, project, description, ColumnRemovalOperation.this, change); + } +} diff --git a/main/src/com/google/refine/operations/column/ColumnRenameOperation.java b/main/src/com/google/refine/operations/column/ColumnRenameOperation.java new file mode 100644 index 000000000..d549b5b1d --- /dev/null +++ b/main/src/com/google/refine/operations/column/ColumnRenameOperation.java @@ -0,0 +1,63 @@ +package com.google.refine.operations.column; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.history.Change; +import com.google.refine.history.HistoryEntry; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; +import com.google.refine.model.changes.ColumnRenameChange; +import com.google.refine.operations.OperationRegistry; + +public class ColumnRenameOperation extends AbstractOperation { + final protected String _oldColumnName; + final protected String _newColumnName; + + static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { + return new ColumnRenameOperation( + obj.getString("oldColumnName"), + obj.getString("newColumnName") + ); + } + + public ColumnRenameOperation( + String oldColumnName, + String newColumnName + ) { + _oldColumnName = oldColumnName; + _newColumnName = newColumnName; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); + writer.key("description"); writer.value("Rename column " + _oldColumnName + " to " + _newColumnName); + writer.key("oldColumnName"); writer.value(_oldColumnName); + writer.key("newColumnName"); writer.value(_newColumnName); + writer.endObject(); + } + + + protected String getBriefDescription(Project project) { + return "Rename column " + _oldColumnName + " to " + _newColumnName; + } + + protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception { + if (project.columnModel.getColumnByName(_oldColumnName) == null) { + throw new Exception("No column named " + _oldColumnName); + } + if (project.columnModel.getColumnByName(_newColumnName) != null) { + throw new Exception("Another column already named " + _newColumnName); + } + + Change change = new ColumnRenameChange(_oldColumnName, _newColumnName); + + return new HistoryEntry(historyEntryID, project, getBriefDescription(null), ColumnRenameOperation.this, change); + } +} diff --git a/main/src/com/google/refine/operations/column/ColumnReorderOperation.java b/main/src/com/google/refine/operations/column/ColumnReorderOperation.java new file mode 100644 index 000000000..e14e45be0 --- /dev/null +++ b/main/src/com/google/refine/operations/column/ColumnReorderOperation.java @@ -0,0 +1,60 @@ +package com.google.refine.operations.column; + +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.history.HistoryEntry; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; +import com.google.refine.model.changes.ColumnReorderChange; +import com.google.refine.operations.OperationRegistry; +import com.google.refine.util.JSONUtilities; + +public class ColumnReorderOperation extends AbstractOperation { + static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { + List columnNames = new ArrayList(); + + JSONUtilities.getStringList(obj, "columnNames", columnNames); + + return new ColumnReorderOperation(columnNames); + } + + final protected List _columnNames; + + public ColumnReorderOperation(List columnNames) { + _columnNames = columnNames; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); + writer.key("description"); writer.value(getBriefDescription(null)); + writer.key("columnNames"); writer.array(); + for (String n : _columnNames) { + writer.value(n); + } + writer.endArray(); + writer.endObject(); + } + + protected String getBriefDescription(Project project) { + return "Reorder columns"; + } + + protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception { + return new HistoryEntry( + historyEntryID, + project, + "Reorder columns", + this, + new ColumnReorderChange(_columnNames) + ); + } +} diff --git a/main/src/com/google/refine/operations/column/ColumnSplitOperation.java b/main/src/com/google/refine/operations/column/ColumnSplitOperation.java new file mode 100644 index 000000000..380d6a6f7 --- /dev/null +++ b/main/src/com/google/refine/operations/column/ColumnSplitOperation.java @@ -0,0 +1,289 @@ +package com.google.refine.operations.column; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; +import java.util.regex.Pattern; + +import org.apache.commons.lang.StringUtils; +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.browsing.Engine; +import com.google.refine.browsing.FilteredRows; +import com.google.refine.browsing.RowVisitor; +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.history.Change; +import com.google.refine.history.HistoryEntry; +import com.google.refine.importers.ImporterUtilities; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.Row; +import com.google.refine.model.changes.ColumnSplitChange; +import com.google.refine.operations.EngineDependentOperation; +import com.google.refine.operations.OperationRegistry; +import com.google.refine.util.JSONUtilities; + +public class ColumnSplitOperation extends EngineDependentOperation { + final protected String _columnName; + final protected boolean _guessCellType; + final protected boolean _removeOriginalColumn; + final protected String _mode; + + final protected String _separator; + final protected boolean _regex; + final protected int _maxColumns; + + final protected int[] _fieldLengths; + + static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { + JSONObject engineConfig = obj.getJSONObject("engineConfig"); + String mode = obj.getString("mode"); + + if ("separator".equals(mode)) { + return new ColumnSplitOperation( + engineConfig, + obj.getString("columnName"), + obj.getBoolean("guessCellType"), + obj.getBoolean("removeOriginalColumn"), + obj.getString("separator"), + obj.getBoolean("regex"), + obj.getInt("maxColumns") + ); + } else { + return new ColumnSplitOperation( + engineConfig, + obj.getString("columnName"), + obj.getBoolean("guessCellType"), + obj.getBoolean("removeOriginalColumn"), + JSONUtilities.getIntArray(obj, "fieldLengths") + ); + } + } + + public ColumnSplitOperation( + JSONObject engineConfig, + String columnName, + boolean guessCellType, + boolean removeOriginalColumn, + String separator, + boolean regex, + int maxColumns + ) { + super(engineConfig); + + _columnName = columnName; + _guessCellType = guessCellType; + _removeOriginalColumn = removeOriginalColumn; + + _mode = "separator"; + _separator = separator; + _regex = regex; + _maxColumns = maxColumns; + + _fieldLengths = null; + } + + public ColumnSplitOperation( + JSONObject engineConfig, + String columnName, + boolean guessCellType, + boolean removeOriginalColumn, + int[] fieldLengths + ) { + super(engineConfig); + + _columnName = columnName; + _guessCellType = guessCellType; + _removeOriginalColumn = removeOriginalColumn; + + _mode = "lengths"; + _separator = null; + _regex = false; + _maxColumns = -1; + + _fieldLengths = fieldLengths; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); + writer.key("description"); writer.value(getBriefDescription(null)); + writer.key("engineConfig"); writer.value(getEngineConfig()); + writer.key("columnName"); writer.value(_columnName); + writer.key("guessCellType"); writer.value(_guessCellType); + writer.key("removeOriginalColumn"); writer.value(_removeOriginalColumn); + writer.key("mode"); writer.value(_mode); + if ("separator".equals(_mode)) { + writer.key("separator"); writer.value(_separator); + writer.key("regex"); writer.value(_regex); + writer.key("maxColumns"); writer.value(_maxColumns); + } else { + writer.key("fieldLengths"); writer.array(); + for (int l : _fieldLengths) { + writer.value(l); + } + writer.endArray(); + } + writer.endObject(); + } + + protected String getBriefDescription(Project project) { + return "Split column " + _columnName + + ("separator".equals(_mode) ? " by separator" : " by field lengths"); + } + + protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception { + Engine engine = createEngine(project); + + Column column = project.columnModel.getColumnByName(_columnName); + if (column == null) { + throw new Exception("No column named " + _columnName); + } + + List columnNames = new ArrayList(); + List rowIndices = new ArrayList(project.rows.size()); + List> tuples = new ArrayList>(project.rows.size()); + + FilteredRows filteredRows = engine.getAllFilteredRows(); + RowVisitor rowVisitor; + if ("lengths".equals(_mode)) { + rowVisitor = new ColumnSplitRowVisitor(column.getCellIndex(), columnNames, rowIndices, tuples) { + protected java.util.List split(String s) { + List results = new ArrayList(_fieldLengths.length + 1); + + int lastIndex = 0; + for (int i = 0; i < _fieldLengths.length; i++) { + int from = lastIndex; + int length = _fieldLengths[i]; + int to = Math.min(from + length, s.length()); + + results.add(stringToValue(s.substring(from, to))); + + lastIndex = to; + } + + return results; + }; + }; + } else if (_regex) { + Pattern pattern = Pattern.compile(_separator); + + rowVisitor = new ColumnSplitRowVisitor(column.getCellIndex(), columnNames, rowIndices, tuples) { + Pattern _pattern; + + protected java.util.List split(String s) { + return stringArrayToValueList(_pattern.split(s, _maxColumns)); + }; + + public RowVisitor init(Pattern pattern) { + _pattern = pattern; + return this; + } + }.init(pattern); + } else { + rowVisitor = new ColumnSplitRowVisitor(column.getCellIndex(), columnNames, rowIndices, tuples) { + protected java.util.List split(String s) { + return stringArrayToValueList( + StringUtils.splitByWholeSeparatorPreserveAllTokens(s, _separator, _maxColumns)); + }; + }; + } + + filteredRows.accept(project, rowVisitor); + + String description = + "Split " + rowIndices.size() + + " cell(s) in column " + _columnName + + " into several columns" + + ("separator".equals(_mode) ? " by separator" : " by field lengths"); + + Change change = new ColumnSplitChange( + _columnName, + columnNames, + rowIndices, + tuples, + _removeOriginalColumn + ); + + return new HistoryEntry( + historyEntryID, project, description, this, change); + } + + protected class ColumnSplitRowVisitor implements RowVisitor { + + int cellIndex; + List columnNames; + List rowIndices; + List> tuples; + + int columnNameIndex = 1; + + ColumnSplitRowVisitor( + int cellIndex, + List columnNames, + List rowIndices, + List> tuples + ) { + this.cellIndex = cellIndex; + this.columnNames = columnNames; + this.rowIndices = rowIndices; + this.tuples = tuples; + } + + @Override + public void start(Project project) { + // nothing to do + } + + @Override + public void end(Project project) { + // nothing to do + } + + public boolean visit(Project project, int rowIndex, Row row) { + Object value = row.getCellValue(cellIndex); + if (ExpressionUtils.isNonBlankData(value)) { + String s = value instanceof String ? ((String) value) : value.toString(); + + List tuple = split(s); + + rowIndices.add(rowIndex); + tuples.add(tuple); + + for (int i = columnNames.size(); i < tuple.size(); i++) { + while (true) { + String newColumnName = _columnName + " " + columnNameIndex++; + if (project.columnModel.getColumnByName(newColumnName) == null) { + columnNames.add(newColumnName); + break; + } + } + } + } + return false; + } + + protected List split(String s) { + throw new UnsupportedOperationException(); + } + + protected Serializable stringToValue(String s) { + return _guessCellType ? ImporterUtilities.parseCellValue(s) : s; + } + + protected List stringArrayToValueList(String[] cells) { + List results = new ArrayList(cells.length); + for (String cell : cells) { + results.add(stringToValue(cell)); + } + + return results; + } + } +} diff --git a/main/src/com/google/refine/operations/column/ExtendDataOperation.java b/main/src/com/google/refine/operations/column/ExtendDataOperation.java new file mode 100644 index 000000000..ed98ef1f4 --- /dev/null +++ b/main/src/com/google/refine/operations/column/ExtendDataOperation.java @@ -0,0 +1,275 @@ +package com.google.refine.operations.column; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.Set; + +import org.apache.commons.lang.StringUtils; +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.browsing.Engine; +import com.google.refine.browsing.FilteredRows; +import com.google.refine.browsing.RowVisitor; +import com.google.refine.history.HistoryEntry; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.ReconCandidate; +import com.google.refine.model.Row; +import com.google.refine.model.changes.CellAtRow; +import com.google.refine.model.changes.DataExtensionChange; +import com.google.refine.operations.EngineDependentOperation; +import com.google.refine.operations.OperationRegistry; +import com.google.refine.process.LongRunningProcess; +import com.google.refine.process.Process; +import com.google.refine.protograph.FreebaseType; +import com.google.refine.util.FreebaseDataExtensionJob; +import com.google.refine.util.FreebaseDataExtensionJob.ColumnInfo; +import com.google.refine.util.FreebaseDataExtensionJob.DataExtension; + +public class ExtendDataOperation extends EngineDependentOperation { + final protected String _baseColumnName; + final protected JSONObject _extension; + final protected int _columnInsertIndex; + + static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { + JSONObject engineConfig = obj.getJSONObject("engineConfig"); + + return new ExtendDataOperation( + engineConfig, + obj.getString("baseColumnName"), + obj.getJSONObject("extension"), + obj.getInt("columnInsertIndex") + ); + } + + public ExtendDataOperation( + JSONObject engineConfig, + String baseColumnName, + JSONObject extension, + int columnInsertIndex + ) { + super(engineConfig); + + _baseColumnName = baseColumnName; + _extension = extension; + _columnInsertIndex = columnInsertIndex; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); + writer.key("description"); writer.value(getBriefDescription(null)); + writer.key("engineConfig"); writer.value(getEngineConfig()); + writer.key("columnInsertIndex"); writer.value(_columnInsertIndex); + writer.key("baseColumnName"); writer.value(_baseColumnName); + writer.key("extension"); writer.value(_extension); + writer.endObject(); + } + + protected String getBriefDescription(Project project) { + return "Extend data at index " + _columnInsertIndex + + " based on column " + _baseColumnName; + } + + protected String createDescription(Column column, List cellsAtRows) { + return "Extend data at index " + _columnInsertIndex + + " based on column " + column.getName() + + " by filling " + cellsAtRows.size(); + } + + public Process createProcess(Project project, Properties options) throws Exception { + return new ExtendDataProcess( + project, + getEngineConfig(), + getBriefDescription(null) + ); + } + + public class ExtendDataProcess extends LongRunningProcess implements Runnable { + final protected Project _project; + final protected JSONObject _engineConfig; + final protected long _historyEntryID; + protected int _cellIndex; + protected FreebaseDataExtensionJob _job; + + public ExtendDataProcess( + Project project, + JSONObject engineConfig, + String description + ) throws JSONException { + super(description); + _project = project; + _engineConfig = engineConfig; + _historyEntryID = HistoryEntry.allocateID(); + + _job = new FreebaseDataExtensionJob(_extension); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("id"); writer.value(hashCode()); + writer.key("description"); writer.value(_description); + writer.key("immediate"); writer.value(false); + writer.key("status"); writer.value(_thread == null ? "pending" : (_thread.isAlive() ? "running" : "done")); + writer.key("progress"); writer.value(_progress); + writer.endObject(); + } + + protected Runnable getRunnable() { + return this; + } + + protected void populateRowsWithMatches(List rowIndices) throws Exception { + Engine engine = new Engine(_project); + engine.initializeFromJSON(_engineConfig); + + Column column = _project.columnModel.getColumnByName(_baseColumnName); + if (column == null) { + throw new Exception("No column named " + _baseColumnName); + } + + _cellIndex = column.getCellIndex(); + + FilteredRows filteredRows = engine.getAllFilteredRows(); + filteredRows.accept(_project, new RowVisitor() { + List _rowIndices; + + public RowVisitor init(List rowIndices) { + _rowIndices = rowIndices; + return this; + } + + @Override + public void start(Project project) { + // nothing to do + } + + @Override + public void end(Project project) { + // nothing to do + } + + public boolean visit(Project project, int rowIndex, Row row) { + Cell cell = row.getCell(_cellIndex); + if (cell != null && cell.recon != null && cell.recon.match != null) { + _rowIndices.add(rowIndex); + } + + return false; + } + }.init(rowIndices)); + } + + protected int extendRows( + List rowIndices, + List dataExtensions, + int from, + int limit, + Map reconCandidateMap + ) { + Set ids = new HashSet(); + + int end; + for (end = from; end < limit && ids.size() < 10; end++) { + int index = rowIndices.get(end); + Row row = _project.rows.get(index); + Cell cell = row.getCell(_cellIndex); + + ids.add(cell.recon.match.id); + } + + Map map = null; + try { + map = _job.extend(ids, reconCandidateMap); + } catch (Exception e) { + map = new HashMap(); + } + + for (int i = from; i < end; i++) { + int index = rowIndices.get(i); + Row row = _project.rows.get(index); + Cell cell = row.getCell(_cellIndex); + String guid = cell.recon.match.id; + + if (map.containsKey(guid)) { + dataExtensions.add(map.get(guid)); + } else { + dataExtensions.add(null); + } + } + + return end; + } + + public void run() { + List rowIndices = new ArrayList(); + List dataExtensions = new ArrayList(); + + try { + populateRowsWithMatches(rowIndices); + } catch (Exception e2) { + // TODO : Not sure what to do here? + e2.printStackTrace(); + } + + int start = 0; + Map reconCandidateMap = new HashMap(); + + while (start < rowIndices.size()) { + int end = extendRows(rowIndices, dataExtensions, start, rowIndices.size(), reconCandidateMap); + start = end; + + _progress = end * 100 / rowIndices.size(); + try { + Thread.sleep(200); + } catch (InterruptedException e) { + if (_canceled) { + break; + } + } + } + + if (!_canceled) { + List columnNames = new ArrayList(); + for (ColumnInfo info : _job.columns) { + columnNames.add(StringUtils.join(info.names, " - ")); + } + + List columnTypes = new ArrayList(); + for (ColumnInfo info : _job.columns) { + columnTypes.add(info.expectedType); + } + + HistoryEntry historyEntry = new HistoryEntry( + _historyEntryID, + _project, + _description, + ExtendDataOperation.this, + new DataExtensionChange( + _baseColumnName, + _columnInsertIndex, + columnNames, + columnTypes, + rowIndices, + dataExtensions, + _historyEntryID) + ); + + _project.history.addEntry(historyEntry); + _project.processManager.onDoneProcess(this); + } + } + } +} diff --git a/main/src/com/google/refine/operations/recon/ImportQADataOperation.java b/main/src/com/google/refine/operations/recon/ImportQADataOperation.java new file mode 100644 index 000000000..9cf1b8dd8 --- /dev/null +++ b/main/src/com/google/refine/operations/recon/ImportQADataOperation.java @@ -0,0 +1,106 @@ +package com.google.refine.operations.recon; + +import java.io.InputStreamReader; +import java.io.LineNumberReader; +import java.net.HttpURLConnection; +import java.net.URL; +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.commands.freebase.UploadDataCommand; +import com.google.refine.history.HistoryEntry; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Cell; +import com.google.refine.model.Project; +import com.google.refine.model.Recon; +import com.google.refine.model.Row; +import com.google.refine.model.changes.MassReconChange; +import com.google.refine.operations.OperationRegistry; +import com.google.refine.util.ParsingUtilities; + +public class ImportQADataOperation extends AbstractOperation { + static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { + return new ImportQADataOperation(); + } + + public ImportQADataOperation() { + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); + writer.key("description"); writer.value(getBriefDescription(null)); + writer.endObject(); + } + + @Override + protected String getBriefDescription(Project project) { + return "Import QA DAta"; + } + + @Override + protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception { + Integer jobID = (Integer) project.getMetadata().getPreferenceStore().get(UploadDataCommand.s_dataLoadJobIDPref); + if (jobID == null) { + throw new InternalError("Project is not associated with any data loading job."); + } + + Map reconIDToResult = new HashMap(); + + URL url = new URL("http://gridworks-loads.dfhuynh.user.dev.freebaseapps.com/get_answers/" + jobID); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setReadTimeout(30000); // 30 seconds + + LineNumberReader reader = new LineNumberReader(new InputStreamReader(conn.getInputStream())); + try { + String line; + while ((line = reader.readLine()) != null) { + JSONObject obj = ParsingUtilities.evaluateJsonStringToObject(line); + long reconID = Long.parseLong(obj.getString("recon_id").substring(3)); + + reconIDToResult.put(reconID, obj.getString("result")); + } + } finally { + reader.close(); + } + + Map oldRecons = new HashMap(); + Map newRecons = new HashMap(); + + for (int r = 0; r < project.rows.size(); r++) { + Row row = project.rows.get(r); + + for (int c = 0; c < row.cells.size(); c++) { + Cell cell = row.cells.get(c); + if (cell != null && cell.recon != null) { + Recon oldRecon = cell.recon; + + if (reconIDToResult.containsKey(oldRecon.id)) { + Recon newRecon = oldRecon.dup(); + newRecon.setFeature(Recon.Feature_qaResult, reconIDToResult.get(oldRecon.id)); + + reconIDToResult.remove(oldRecon.id); + + oldRecons.put(oldRecon.id, oldRecon); + newRecons.put(oldRecon.id, newRecon); + } + } + } + } + + return new HistoryEntry( + historyEntryID, + project, + getBriefDescription(project), + this, + new MassReconChange(newRecons, oldRecons) + ); + } +} diff --git a/main/src/com/google/refine/operations/recon/ReconDiscardJudgmentsOperation.java b/main/src/com/google/refine/operations/recon/ReconDiscardJudgmentsOperation.java new file mode 100644 index 000000000..bbe6f9afa --- /dev/null +++ b/main/src/com/google/refine/operations/recon/ReconDiscardJudgmentsOperation.java @@ -0,0 +1,125 @@ +package com.google.refine.operations.recon; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.browsing.RowVisitor; +import com.google.refine.history.Change; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.Recon; +import com.google.refine.model.Row; +import com.google.refine.model.Recon.Judgment; +import com.google.refine.model.changes.CellChange; +import com.google.refine.model.changes.ReconChange; +import com.google.refine.operations.EngineDependentMassCellOperation; +import com.google.refine.operations.OperationRegistry; + +public class ReconDiscardJudgmentsOperation extends EngineDependentMassCellOperation { + static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { + JSONObject engineConfig = obj.getJSONObject("engineConfig"); + String columnName = obj.getString("columnName"); + + return new ReconDiscardJudgmentsOperation( + engineConfig, + columnName + ); + } + + public ReconDiscardJudgmentsOperation(JSONObject engineConfig, String columnName) { + super(engineConfig, columnName, false); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); + writer.key("description"); writer.value(getBriefDescription(null)); + writer.key("engineConfig"); writer.value(getEngineConfig()); + writer.key("columnName"); writer.value(_columnName); + writer.endObject(); + } + + protected String getBriefDescription(Project project) { + return "Discard recon judgments for cells in column " + _columnName; + } + + protected String createDescription(Column column, + List cellChanges) { + + return "Discard recon judgments for " + cellChanges.size() + + " cells in column " + column.getName(); + } + + protected RowVisitor createRowVisitor(Project project, List cellChanges, long historyEntryID) throws Exception { + Column column = project.columnModel.getColumnByName(_columnName); + + return new RowVisitor() { + int cellIndex; + List cellChanges; + Map dupReconMap = new HashMap(); + long historyEntryID; + + public RowVisitor init(int cellIndex, List cellChanges, long historyEntryID) { + this.cellIndex = cellIndex; + this.cellChanges = cellChanges; + this.historyEntryID = historyEntryID; + return this; + } + + @Override + public void start(Project project) { + // nothing to do + } + + @Override + public void end(Project project) { + // nothing to do + } + + public boolean visit(Project project, int rowIndex, Row row) { + Cell cell = row.getCell(cellIndex); + if (cell != null && cell.recon != null) { + Recon newRecon; + if (dupReconMap.containsKey(cell.recon.id)) { + newRecon = dupReconMap.get(cell.recon.id); + newRecon.judgmentBatchSize++; + } else { + newRecon = cell.recon.dup(historyEntryID); + newRecon.match = null; + newRecon.matchRank = -1; + newRecon.judgment = Judgment.None; + newRecon.judgmentAction = "mass"; + newRecon.judgmentBatchSize = 1; + + dupReconMap.put(cell.recon.id, newRecon); + } + + Cell newCell = new Cell(cell.value, newRecon); + + CellChange cellChange = new CellChange(rowIndex, cellIndex, cell, newCell); + cellChanges.add(cellChange); + } + return false; + } + }.init(column.getCellIndex(), cellChanges, historyEntryID); + } + + protected Change createChange(Project project, Column column, List cellChanges) { + return new ReconChange( + cellChanges, + _columnName, + column.getReconConfig(), + null + ); + } +} diff --git a/main/src/com/google/refine/operations/recon/ReconJudgeSimilarCellsOperation.java b/main/src/com/google/refine/operations/recon/ReconJudgeSimilarCellsOperation.java new file mode 100644 index 000000000..8dfd98316 --- /dev/null +++ b/main/src/com/google/refine/operations/recon/ReconJudgeSimilarCellsOperation.java @@ -0,0 +1,248 @@ +package com.google.refine.operations.recon; + + import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; + +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.browsing.RowVisitor; +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.history.Change; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.Recon; +import com.google.refine.model.ReconCandidate; +import com.google.refine.model.Row; +import com.google.refine.model.Recon.Judgment; +import com.google.refine.model.changes.CellChange; +import com.google.refine.model.changes.ReconChange; +import com.google.refine.operations.EngineDependentMassCellOperation; +import com.google.refine.operations.OperationRegistry; + +public class ReconJudgeSimilarCellsOperation extends EngineDependentMassCellOperation { + final protected String _similarValue; + final protected Judgment _judgment; + final protected ReconCandidate _match; + final protected boolean _shareNewTopics; + + static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { + JSONObject engineConfig = obj.getJSONObject("engineConfig"); + + ReconCandidate match = null; + if (obj.has("match")) { + JSONObject matchObj = obj.getJSONObject("match"); + + JSONArray types = matchObj.getJSONArray("types"); + String[] typeIDs = new String[types.length()]; + for (int i = 0; i < typeIDs.length; i++) { + typeIDs[i] = types.getString(i); + } + + match = new ReconCandidate( + matchObj.getString("id"), + matchObj.getString("name"), + typeIDs, + matchObj.getDouble("score") + ); + } + + Judgment judgment = Judgment.None; + if (obj.has("judgment")) { + judgment = Recon.stringToJudgment(obj.getString("judgment")); + } + + return new ReconJudgeSimilarCellsOperation( + engineConfig, + obj.getString("columnName"), + obj.getString("similarValue"), + judgment, + match, + obj.has("shareNewTopics") ? obj.getBoolean("shareNewTopics") : false + ); + } + + public ReconJudgeSimilarCellsOperation( + JSONObject engineConfig, + String columnName, + String similarValue, + Judgment judgment, + ReconCandidate match, + boolean shareNewTopics + ) { + super(engineConfig, columnName, false); + this._similarValue = similarValue; + this._judgment = judgment; + this._match = match; + this._shareNewTopics = shareNewTopics; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); + writer.key("description"); writer.value(getBriefDescription(null)); + writer.key("engineConfig"); writer.value(getEngineConfig()); + writer.key("columnName"); writer.value(_columnName); + writer.key("similarValue"); writer.value(_similarValue); + writer.key("judgment"); writer.value(Recon.judgmentToString(_judgment)); + if (_match != null) { + writer.key("match"); _match.write(writer, options); + } + writer.key("shareNewTopics"); writer.value(_shareNewTopics); + + writer.endObject(); + } + + protected String getBriefDescription(Project project) { + if (_judgment == Judgment.None) { + return "Discard recon judgments for cells containing \"" + + _similarValue + "\" in column " + _columnName; + } else if (_judgment == Judgment.New) { + if (_shareNewTopics) { + return "Mark to create one single new topic for all cells containing \"" + + _similarValue + "\" in column " + _columnName; + } else { + return "Mark to create one new topic for each cell containing \"" + + _similarValue + "\" in column " + _columnName; + } + } else if (_judgment == Judgment.Matched) { + return "Match topic " + + _match.name + " (" + + _match.id + ") for cells containing \"" + + _similarValue + "\" in column " + _columnName; + } + throw new InternalError("Can't get here"); + } + + protected String createDescription(Column column, + List cellChanges) { + + if (_judgment == Judgment.None) { + return "Discard recon judgments for " + cellChanges.size() + " cells containing \"" + + _similarValue + "\" in column " + _columnName; + } else if (_judgment == Judgment.New) { + if (_shareNewTopics) { + return "Mark to create one single new topic for " + cellChanges.size() + " cells containing \"" + + _similarValue + "\" in column " + _columnName; + } else { + return "Mark to create one new topic for each of " + cellChanges.size() + " cells containing \"" + + _similarValue + "\" in column " + _columnName; + } + } else if (_judgment == Judgment.Matched) { + return "Match topic " + + _match.name + " (" + + _match.id + ") for " + + cellChanges.size() + " cells containing \"" + + _similarValue + "\" in column " + _columnName; + } + throw new InternalError("Can't get here"); + } + + protected RowVisitor createRowVisitor(Project project, List cellChanges, long historyEntryID) throws Exception { + Column column = project.columnModel.getColumnByName(_columnName); + + return new RowVisitor() { + int _cellIndex; + List _cellChanges; + Recon _sharedNewRecon = null; + Map _dupReconMap = new HashMap(); + long _historyEntryID; + + public RowVisitor init(int cellIndex, List cellChanges, long historyEntryID) { + _cellIndex = cellIndex; + _cellChanges = cellChanges; + _historyEntryID = historyEntryID; + return this; + } + + @Override + public void start(Project project) { + // nothing to do + } + + @Override + public void end(Project project) { + // nothing to do + } + + public boolean visit(Project project, int rowIndex, Row row) { + Cell cell = row.getCell(_cellIndex); + if (cell != null && ExpressionUtils.isNonBlankData(cell.value)) { + String value = cell.value instanceof String ? + ((String) cell.value) : cell.value.toString(); + + if (_similarValue.equals(value)) { + Recon recon = null; + if (_judgment == Judgment.New && _shareNewTopics) { + if (_sharedNewRecon == null) { + _sharedNewRecon = new Recon(_historyEntryID, null, null); + _sharedNewRecon.judgment = Judgment.New; + _sharedNewRecon.judgmentBatchSize = 0; + _sharedNewRecon.judgmentAction = "similar"; + } + _sharedNewRecon.judgmentBatchSize++; + + recon = _sharedNewRecon; + } else { + if (_dupReconMap.containsKey(cell.recon.id)) { + recon = _dupReconMap.get(cell.recon.id); + recon.judgmentBatchSize++; + } else { + recon = cell.recon.dup(_historyEntryID); + recon.judgmentBatchSize = 1; + recon.matchRank = -1; + recon.judgmentAction = "similar"; + + if (_judgment == Judgment.Matched) { + recon.judgment = Recon.Judgment.Matched; + recon.match = _match; + + if (recon.candidates != null) { + for (int m = 0; m < recon.candidates.size(); m++) { + if (recon.candidates.get(m).id.equals(_match.id)) { + recon.matchRank = m; + break; + } + } + } + } else if (_judgment == Judgment.New) { + recon.judgment = Recon.Judgment.New; + recon.match = null; + } else if (_judgment == Judgment.None) { + recon.judgment = Recon.Judgment.None; + recon.match = null; + } + + _dupReconMap.put(cell.recon.id, recon); + } + } + + Cell newCell = new Cell(cell.value, recon); + + CellChange cellChange = new CellChange(rowIndex, _cellIndex, cell, newCell); + _cellChanges.add(cellChange); + } + } + return false; + } + }.init(column.getCellIndex(), cellChanges, historyEntryID); + } + + + protected Change createChange(Project project, Column column, List cellChanges) { + return new ReconChange( + cellChanges, + _columnName, + column.getReconConfig(), + null + ); + } +} diff --git a/main/src/com/google/refine/operations/recon/ReconMarkNewTopicsOperation.java b/main/src/com/google/refine/operations/recon/ReconMarkNewTopicsOperation.java new file mode 100644 index 000000000..c0054d505 --- /dev/null +++ b/main/src/com/google/refine/operations/recon/ReconMarkNewTopicsOperation.java @@ -0,0 +1,143 @@ +package com.google.refine.operations.recon; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.browsing.RowVisitor; +import com.google.refine.history.Change; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.Recon; +import com.google.refine.model.Row; +import com.google.refine.model.Recon.Judgment; +import com.google.refine.model.changes.CellChange; +import com.google.refine.model.changes.ReconChange; +import com.google.refine.operations.EngineDependentMassCellOperation; +import com.google.refine.operations.OperationRegistry; + +public class ReconMarkNewTopicsOperation extends EngineDependentMassCellOperation { + final protected boolean _shareNewTopics; + + static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { + JSONObject engineConfig = obj.getJSONObject("engineConfig"); + + return new ReconMarkNewTopicsOperation( + engineConfig, + obj.getString("columnName"), + obj.has("shareNewTopics") ? obj.getBoolean("shareNewTopics") : false + ); + } + + public ReconMarkNewTopicsOperation(JSONObject engineConfig, String columnName, boolean shareNewTopics) { + super(engineConfig, columnName, false); + _shareNewTopics = shareNewTopics; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); + writer.key("description"); writer.value(getBriefDescription(null)); + writer.key("engineConfig"); writer.value(getEngineConfig()); + writer.key("columnName"); writer.value(_columnName); + writer.key("shareNewTopics"); writer.value(_shareNewTopics); + writer.endObject(); + } + + protected String getBriefDescription(Project project) { + return "Mark to create new topics for cells in column " + _columnName + + (_shareNewTopics ? + ", one topic for each group of similar cells" : + ", one topic for each cell"); + } + + protected String createDescription(Column column, + List cellChanges) { + + return "Mark to create new topics for " + cellChanges.size() + + " cells in column " + column.getName() + + (_shareNewTopics ? + ", one topic for each group of similar cells" : + ", one topic for each cell"); + } + + protected RowVisitor createRowVisitor(Project project, List cellChanges, long historyEntryID) throws Exception { + Column column = project.columnModel.getColumnByName(_columnName); + + return new RowVisitor() { + int cellIndex; + List cellChanges; + Map sharedRecons = new HashMap(); + long historyEntryID; + + public RowVisitor init(int cellIndex, List cellChanges, long historyEntryID) { + this.cellIndex = cellIndex; + this.cellChanges = cellChanges; + this.historyEntryID = historyEntryID; + return this; + } + + @Override + public void start(Project project) { + // nothing to do + } + + @Override + public void end(Project project) { + // nothing to do + } + + public boolean visit(Project project, int rowIndex, Row row) { + Cell cell = row.getCell(cellIndex); + if (cell != null) { + Recon recon = null; + if (_shareNewTopics) { + String s = cell.value == null ? "" : cell.value.toString(); + if (sharedRecons.containsKey(s)) { + recon = sharedRecons.get(s); + recon.judgmentBatchSize++; + } else { + recon = new Recon(historyEntryID, null, null); + recon.judgment = Judgment.New; + recon.judgmentBatchSize = 1; + recon.judgmentAction = "mass"; + + sharedRecons.put(s, recon); + } + } else { + recon = cell.recon == null ? new Recon(historyEntryID, null, null) : cell.recon.dup(historyEntryID); + recon.match = null; + recon.matchRank = -1; + recon.judgment = Judgment.New; + recon.judgmentBatchSize = 1; + recon.judgmentAction = "mass"; + } + + Cell newCell = new Cell(cell.value, recon); + + CellChange cellChange = new CellChange(rowIndex, cellIndex, cell, newCell); + cellChanges.add(cellChange); + } + return false; + } + }.init(column.getCellIndex(), cellChanges, historyEntryID); + } + + protected Change createChange(Project project, Column column, List cellChanges) { + return new ReconChange( + cellChanges, + _columnName, + column.getReconConfig(), + null + ); + } +} diff --git a/main/src/com/google/refine/operations/recon/ReconMatchBestCandidatesOperation.java b/main/src/com/google/refine/operations/recon/ReconMatchBestCandidatesOperation.java new file mode 100644 index 000000000..c7eec8a1c --- /dev/null +++ b/main/src/com/google/refine/operations/recon/ReconMatchBestCandidatesOperation.java @@ -0,0 +1,133 @@ +package com.google.refine.operations.recon; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.browsing.RowVisitor; +import com.google.refine.history.Change; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.Recon; +import com.google.refine.model.ReconCandidate; +import com.google.refine.model.Row; +import com.google.refine.model.Recon.Judgment; +import com.google.refine.model.changes.CellChange; +import com.google.refine.model.changes.ReconChange; +import com.google.refine.operations.EngineDependentMassCellOperation; +import com.google.refine.operations.OperationRegistry; + +public class ReconMatchBestCandidatesOperation extends EngineDependentMassCellOperation { + static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { + JSONObject engineConfig = obj.getJSONObject("engineConfig"); + String columnName = obj.getString("columnName"); + + return new ReconMatchBestCandidatesOperation( + engineConfig, + columnName + ); + } + + public ReconMatchBestCandidatesOperation(JSONObject engineConfig, String columnName) { + super(engineConfig, columnName, false); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); + writer.key("description"); writer.value(getBriefDescription(null)); + writer.key("engineConfig"); writer.value(getEngineConfig()); + writer.key("columnName"); writer.value(_columnName); + writer.endObject(); + } + + protected String getBriefDescription(Project project) { + return "Match each cell to its best recon candidate in column " + _columnName; + } + + protected String createDescription(Column column, + List cellChanges) { + + return "Match each of " + cellChanges.size() + + " cells to its best candidate in column " + column.getName(); + } + + protected RowVisitor createRowVisitor(Project project, List cellChanges, long historyEntryID) throws Exception { + Column column = project.columnModel.getColumnByName(_columnName); + + return new RowVisitor() { + int cellIndex; + List cellChanges; + Map dupReconMap = new HashMap(); + long historyEntryID; + + public RowVisitor init(int cellIndex, List cellChanges, long historyEntryID) { + this.cellIndex = cellIndex; + this.cellChanges = cellChanges; + this.historyEntryID = historyEntryID; + return this; + } + + @Override + public void start(Project project) { + // nothing to do + } + + @Override + public void end(Project project) { + // nothing to do + } + + public boolean visit(Project project, int rowIndex, Row row) { + if (cellIndex < row.cells.size()) { + Cell cell = row.cells.get(cellIndex); + if (cell != null && cell.recon != null) { + ReconCandidate candidate = cell.recon.getBestCandidate(); + if (candidate != null) { + Recon newRecon; + if (dupReconMap.containsKey(cell.recon.id)) { + newRecon = dupReconMap.get(cell.recon.id); + newRecon.judgmentBatchSize++; + } else { + newRecon = cell.recon.dup(historyEntryID); + newRecon.judgmentBatchSize = 1; + newRecon.match = candidate; + newRecon.matchRank = 0; + newRecon.judgment = Judgment.Matched; + newRecon.judgmentAction = "mass"; + + dupReconMap.put(cell.recon.id, newRecon); + } + Cell newCell = new Cell( + cell.value, + newRecon + ); + + CellChange cellChange = new CellChange(rowIndex, cellIndex, cell, newCell); + cellChanges.add(cellChange); + } + } + } + return false; + } + }.init(column.getCellIndex(), cellChanges, historyEntryID); + } + + protected Change createChange(Project project, Column column, List cellChanges) { + return new ReconChange( + cellChanges, + _columnName, + column.getReconConfig(), + null + ); + } +} diff --git a/main/src/com/google/refine/operations/recon/ReconMatchSpecificTopicOperation.java b/main/src/com/google/refine/operations/recon/ReconMatchSpecificTopicOperation.java new file mode 100644 index 000000000..053303054 --- /dev/null +++ b/main/src/com/google/refine/operations/recon/ReconMatchSpecificTopicOperation.java @@ -0,0 +1,182 @@ +package com.google.refine.operations.recon; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; + +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.browsing.RowVisitor; +import com.google.refine.history.Change; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.Recon; +import com.google.refine.model.ReconCandidate; +import com.google.refine.model.Row; +import com.google.refine.model.Recon.Judgment; +import com.google.refine.model.changes.CellChange; +import com.google.refine.model.changes.ReconChange; +import com.google.refine.operations.EngineDependentMassCellOperation; +import com.google.refine.operations.OperationRegistry; + +public class ReconMatchSpecificTopicOperation extends EngineDependentMassCellOperation { + final protected ReconCandidate match; + final protected String identifierSpace; + final protected String schemaSpace; + + static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { + JSONObject engineConfig = obj.getJSONObject("engineConfig"); + + JSONObject match = obj.getJSONObject("match"); + + JSONArray types = obj.getJSONArray("types"); + String[] typeIDs = new String[types.length()]; + for (int i = 0; i < typeIDs.length; i++) { + typeIDs[i] = types.getString(i); + } + + return new ReconMatchSpecificTopicOperation( + engineConfig, + obj.getString("columnName"), + new ReconCandidate( + match.getString("id"), + match.getString("name"), + typeIDs, + 100 + ), + obj.getString("identifierSpace"), + obj.getString("schemaSpace") + ); + } + + public ReconMatchSpecificTopicOperation( + JSONObject engineConfig, + String columnName, + ReconCandidate match, + String identifierSpace, + String schemaSpace + ) { + super(engineConfig, columnName, false); + this.match = match; + this.identifierSpace = identifierSpace; + this.schemaSpace = schemaSpace; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); + writer.key("description"); writer.value(getBriefDescription(null)); + writer.key("engineConfig"); writer.value(getEngineConfig()); + writer.key("columnName"); writer.value(_columnName); + writer.key("match"); + writer.object(); + writer.key("id"); writer.value(match.id); + writer.key("name"); writer.value(match.name); + writer.key("types"); + writer.array(); + for (String typeID : match.types) { + writer.value(typeID); + } + writer.endArray(); + writer.endObject(); + writer.key("identifierSpace"); writer.value(identifierSpace); + writer.key("schemaSpace"); writer.value(schemaSpace); + writer.endObject(); + } + + protected String getBriefDescription(Project project) { + return "Match specific topic " + + match.name + " (" + + match.id + ") to cells in column " + _columnName; + } + + protected String createDescription(Column column, + List cellChanges) { + return "Match specific topic " + + match.name + " (" + + match.id + ") to " + cellChanges.size() + + " cells in column " + column.getName(); + } + + protected RowVisitor createRowVisitor(Project project, List cellChanges, long historyEntryID) throws Exception { + Column column = project.columnModel.getColumnByName(_columnName); + + return new RowVisitor() { + int cellIndex; + List cellChanges; + Map dupReconMap = new HashMap(); + long historyEntryID; + + public RowVisitor init(int cellIndex, List cellChanges, long historyEntryID) { + this.cellIndex = cellIndex; + this.cellChanges = cellChanges; + this.historyEntryID = historyEntryID; + return this; + } + + @Override + public void start(Project project) { + // nothing to do + } + + @Override + public void end(Project project) { + // nothing to do + } + + public boolean visit(Project project, int rowIndex, Row row) { + Cell cell = row.getCell(cellIndex); + if (cell != null) { + long reconID = cell.recon != null ? cell.recon.id : 0; + + Recon newRecon; + if (dupReconMap.containsKey(reconID)) { + newRecon = dupReconMap.get(reconID); + newRecon.judgmentBatchSize++; + } else { + newRecon = cell.recon != null ? + cell.recon.dup(historyEntryID) : + new Recon( + historyEntryID, + identifierSpace, + schemaSpace); + + newRecon.match = match; + newRecon.matchRank = -1; + newRecon.judgment = Judgment.Matched; + newRecon.judgmentAction = "mass"; + newRecon.judgmentBatchSize = 1; + + dupReconMap.put(reconID, newRecon); + } + + Cell newCell = new Cell( + cell.value, + newRecon + ); + + CellChange cellChange = new CellChange(rowIndex, cellIndex, cell, newCell); + cellChanges.add(cellChange); + } + return false; + } + }.init(column.getCellIndex(), cellChanges, historyEntryID); + } + + protected Change createChange(Project project, Column column, List cellChanges) { + return new ReconChange( + cellChanges, + _columnName, + column.getReconConfig(), + null + ); + } +} diff --git a/main/src/com/google/refine/operations/recon/ReconOperation.java b/main/src/com/google/refine/operations/recon/ReconOperation.java new file mode 100644 index 000000000..98a68e168 --- /dev/null +++ b/main/src/com/google/refine/operations/recon/ReconOperation.java @@ -0,0 +1,298 @@ +package com.google.refine.operations.recon; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.browsing.Engine; +import com.google.refine.browsing.FilteredRows; +import com.google.refine.browsing.RowVisitor; +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.history.Change; +import com.google.refine.history.HistoryEntry; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.Recon; +import com.google.refine.model.Row; +import com.google.refine.model.changes.CellChange; +import com.google.refine.model.changes.ReconChange; +import com.google.refine.model.recon.ReconConfig; +import com.google.refine.model.recon.ReconJob; +import com.google.refine.model.recon.StandardReconConfig; +import com.google.refine.operations.EngineDependentOperation; +import com.google.refine.operations.OperationRegistry; +import com.google.refine.process.LongRunningProcess; +import com.google.refine.process.Process; + +public class ReconOperation extends EngineDependentOperation { + final protected String _columnName; + final protected ReconConfig _reconConfig; + + static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { + JSONObject engineConfig = obj.getJSONObject("engineConfig"); + + return new ReconOperation( + engineConfig, + obj.getString("columnName"), + ReconConfig.reconstruct(obj.getJSONObject("config")) + ); + } + + public ReconOperation( + JSONObject engineConfig, + String columnName, + ReconConfig reconConfig + ) { + super(engineConfig); + _columnName = columnName; + _reconConfig = reconConfig; + } + + public Process createProcess(Project project, Properties options) throws Exception { + return new ReconProcess( + project, + getEngineConfig(), + getBriefDescription(null) + ); + } + + protected String getBriefDescription(Project project) { + return _reconConfig.getBriefDescription(project, _columnName); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); + writer.key("description"); writer.value(getBriefDescription(null)); + writer.key("columnName"); writer.value(_columnName); + writer.key("config"); _reconConfig.write(writer, options); + writer.key("engineConfig"); writer.value(getEngineConfig()); + writer.endObject(); + } + + static protected class ReconEntry { + final public int rowIndex; + final public Cell cell; + + public ReconEntry(int rowIndex, Cell cell) { + this.rowIndex = rowIndex; + this.cell = cell; + } + } + static protected class JobGroup { + final public ReconJob job; + final public List entries = new ArrayList(); + + public JobGroup(ReconJob job) { + this.job = job; + } + } + + public class ReconProcess extends LongRunningProcess implements Runnable { + final protected Project _project; + final protected JSONObject _engineConfig; + final protected long _historyEntryID; + protected List _entries; + protected int _cellIndex; + + public ReconProcess( + Project project, + JSONObject engineConfig, + String description + ) { + super(description); + _project = project; + _engineConfig = engineConfig; + _historyEntryID = HistoryEntry.allocateID(); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("id"); writer.value(hashCode()); + writer.key("description"); writer.value(_description); + writer.key("immediate"); writer.value(false); + writer.key("status"); writer.value(_thread == null ? "pending" : (_thread.isAlive() ? "running" : "done")); + writer.key("progress"); writer.value(_progress); + writer.key("onDone"); + writer.array(); + writer.object(); + writer.key("action"); writer.value("createFacet"); + writer.key("facetType"); writer.value("list"); + writer.key("facetConfig"); + writer.object(); + writer.key("name"); writer.value(_columnName + ": judgment"); + writer.key("columnName"); writer.value(_columnName); + writer.key("expression"); writer.value("cell.recon.judgment"); + writer.key("omitError"); writer.value(true); + writer.endObject(); + writer.key("facetOptions"); + writer.object(); + writer.key("scroll"); writer.value(false); + writer.endObject(); + writer.endObject(); + + if (_reconConfig instanceof StandardReconConfig) { + writer.object(); + writer.key("action"); writer.value("createFacet"); + writer.key("facetType"); writer.value("range"); + writer.key("facetConfig"); + writer.object(); + writer.key("name"); writer.value(_columnName + ": best candidate's score"); + writer.key("columnName"); writer.value(_columnName); + writer.key("expression"); writer.value("cell.recon.best.score"); + writer.key("mode"); writer.value("range"); + writer.endObject(); + writer.endObject(); + } + writer.endArray(); + writer.endObject(); + } + + protected Runnable getRunnable() { + return this; + } + + protected void populateEntries() throws Exception { + Engine engine = new Engine(_project); + engine.initializeFromJSON(_engineConfig); + + Column column = _project.columnModel.getColumnByName(_columnName); + if (column == null) { + throw new Exception("No column named " + _columnName); + } + + _entries = new ArrayList(_project.rows.size()); + _cellIndex = column.getCellIndex(); + + FilteredRows filteredRows = engine.getAllFilteredRows(); + filteredRows.accept(_project, new RowVisitor() { + @Override + public void start(Project project) { + // nothing to do + } + + @Override + public void end(Project project) { + // nothing to do + } + + public boolean visit(Project project, int rowIndex, Row row) { + if (_cellIndex < row.cells.size()) { + Cell cell = row.cells.get(_cellIndex); + if (cell != null && ExpressionUtils.isNonBlankData(cell.value)) { + _entries.add(new ReconEntry(rowIndex, cell)); + } + } + return false; + } + }); + } + + public void run() { + try { + populateEntries(); + } catch (Exception e2) { + // TODO : Not sure what to do here? + e2.printStackTrace(); + } + + Map jobKeyToGroup = new HashMap(); + + for (ReconEntry entry : _entries) { + ReconJob job = _reconConfig.createJob( + _project, + entry.rowIndex, + _project.rows.get(entry.rowIndex), + _columnName, + entry.cell + ); + + int key = job.getKey(); + JobGroup group = jobKeyToGroup.get(key); + if (group == null) { + group = new JobGroup(job); + jobKeyToGroup.put(key, group); + } + group.entries.add(entry); + } + + List cellChanges = new ArrayList(_entries.size()); + List groups = new ArrayList(jobKeyToGroup.values()); + + int batchSize = _reconConfig.getBatchSize(); + for (int i = 0; i < groups.size(); i += batchSize) { + int to = Math.min(i + batchSize, groups.size()); + + List jobs = new ArrayList(to - i); + for (int j = i; j < to; j++) { + jobs.add(groups.get(j).job); + } + + List recons = _reconConfig.batchRecon(jobs, _historyEntryID); + for (int j = i; j < to; j++) { + int index = j - i; + Recon recon = index < recons.size() ? recons.get(j - i) : null; + List entries = groups.get(j).entries; + + if (recon != null) { + recon.judgmentBatchSize = entries.size(); + } + + for (ReconEntry entry : entries) { + Cell oldCell = entry.cell; + Cell newCell = new Cell(oldCell.value, recon); + + CellChange cellChange = new CellChange( + entry.rowIndex, + _cellIndex, + oldCell, + newCell + ); + cellChanges.add(cellChange); + } + } + + _progress = i * 100 / groups.size(); + try { + Thread.sleep(50); + } catch (InterruptedException e) { + if (_canceled) { + break; + } + } + } + + if (!_canceled) { + Change reconChange = new ReconChange( + cellChanges, + _columnName, + _reconConfig, + null + ); + + HistoryEntry historyEntry = new HistoryEntry( + _historyEntryID, + _project, + _description, + ReconOperation.this, + reconChange + ); + + _project.history.addEntry(historyEntry); + _project.processManager.onDoneProcess(this); + } + } + } +} diff --git a/main/src/com/google/refine/operations/row/DenormalizeOperation.java b/main/src/com/google/refine/operations/row/DenormalizeOperation.java new file mode 100644 index 000000000..4b9d2ad90 --- /dev/null +++ b/main/src/com/google/refine/operations/row/DenormalizeOperation.java @@ -0,0 +1,82 @@ +package com.google.refine.operations.row; + +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.history.HistoryEntry; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Cell; +import com.google.refine.model.Project; +import com.google.refine.model.Row; +import com.google.refine.model.RecordModel.CellDependency; +import com.google.refine.model.RecordModel.RowDependency; +import com.google.refine.model.changes.MassRowChange; +import com.google.refine.operations.OperationRegistry; + +public class DenormalizeOperation extends AbstractOperation { + static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { + return new DenormalizeOperation(); + } + + public DenormalizeOperation() { + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); + writer.key("description"); writer.value("Denormalize"); + writer.endObject(); + } + + + protected String getBriefDescription(Project project) { + return "Denormalize"; + } + + protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception { + List newRows = new ArrayList(); + + List oldRows = project.rows; + for (int r = 0; r < oldRows.size(); r++) { + Row oldRow = oldRows.get(r); + Row newRow = null; + + RowDependency rd = project.recordModel.getRowDependency(r); + if (rd.cellDependencies != null) { + newRow = oldRow.dup(); + + for (int c = 0; c < rd.cellDependencies.length; c++) { + CellDependency cd = rd.cellDependencies[c]; + if (cd != null) { + int contextRowIndex = cd.rowIndex; + int contextCellIndex = cd.cellIndex; + + if (contextRowIndex >= 0 && contextRowIndex < oldRows.size()) { + Row contextRow = oldRows.get(contextRowIndex); + Cell contextCell = contextRow.getCell(contextCellIndex); + + newRow.setCell(contextCellIndex, contextCell); + } + } + } + } + + newRows.add(newRow != null ? newRow : oldRow); + } + + return new HistoryEntry( + historyEntryID, + project, + getBriefDescription(project), + DenormalizeOperation.this, + new MassRowChange(newRows) + ); + } +} diff --git a/main/src/com/google/refine/operations/row/RowFlagOperation.java b/main/src/com/google/refine/operations/row/RowFlagOperation.java new file mode 100644 index 000000000..0fa804e57 --- /dev/null +++ b/main/src/com/google/refine/operations/row/RowFlagOperation.java @@ -0,0 +1,103 @@ +package com.google.refine.operations.row; + +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.browsing.Engine; +import com.google.refine.browsing.FilteredRows; +import com.google.refine.browsing.RowVisitor; +import com.google.refine.history.Change; +import com.google.refine.history.HistoryEntry; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; +import com.google.refine.model.Row; +import com.google.refine.model.changes.MassChange; +import com.google.refine.model.changes.RowFlagChange; +import com.google.refine.operations.EngineDependentOperation; +import com.google.refine.operations.OperationRegistry; + +public class RowFlagOperation extends EngineDependentOperation { + final protected boolean _flagged; + + static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { + JSONObject engineConfig = obj.getJSONObject("engineConfig"); + boolean flagged = obj.getBoolean("flagged"); + + return new RowFlagOperation( + engineConfig, + flagged + ); + } + + public RowFlagOperation(JSONObject engineConfig, boolean flagged) { + super(engineConfig); + _flagged = flagged; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); + writer.key("description"); writer.value(getBriefDescription(null)); + writer.key("engineConfig"); writer.value(getEngineConfig()); + writer.key("flagged"); writer.value(_flagged); + writer.endObject(); + } + + protected String getBriefDescription(Project project) { + return (_flagged ? "Flag rows" : "Unflag rows"); + } + + protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception { + Engine engine = createEngine(project); + + List changes = new ArrayList(project.rows.size()); + + FilteredRows filteredRows = engine.getAllFilteredRows(); + filteredRows.accept(project, createRowVisitor(project, changes)); + + return new HistoryEntry( + historyEntryID, + project, + (_flagged ? "Flag" : "Unflag") + " " + changes.size() + " rows", + this, + new MassChange(changes, false) + ); + } + + protected RowVisitor createRowVisitor(Project project, List changes) throws Exception { + return new RowVisitor() { + List changes; + + public RowVisitor init(List changes) { + this.changes = changes; + return this; + } + + @Override + public void start(Project project) { + // nothing to do + } + + @Override + public void end(Project project) { + // nothing to do + } + + public boolean visit(Project project, int rowIndex, Row row) { + if (row.flagged != _flagged) { + RowFlagChange change = new RowFlagChange(rowIndex, _flagged); + + changes.add(change); + } + return false; + } + }.init(changes); + } +} diff --git a/main/src/com/google/refine/operations/row/RowRemovalOperation.java b/main/src/com/google/refine/operations/row/RowRemovalOperation.java new file mode 100644 index 000000000..959884aaa --- /dev/null +++ b/main/src/com/google/refine/operations/row/RowRemovalOperation.java @@ -0,0 +1,92 @@ +package com.google.refine.operations.row; + + import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.browsing.Engine; +import com.google.refine.browsing.FilteredRows; +import com.google.refine.browsing.RowVisitor; +import com.google.refine.history.HistoryEntry; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; +import com.google.refine.model.Row; +import com.google.refine.model.changes.RowRemovalChange; +import com.google.refine.operations.EngineDependentOperation; +import com.google.refine.operations.OperationRegistry; + +public class RowRemovalOperation extends EngineDependentOperation { + static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { + JSONObject engineConfig = obj.getJSONObject("engineConfig"); + + return new RowRemovalOperation( + engineConfig + ); + } + + public RowRemovalOperation(JSONObject engineConfig) { + super(engineConfig); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); + writer.key("description"); writer.value(getBriefDescription(null)); + writer.key("engineConfig"); writer.value(getEngineConfig()); + writer.endObject(); + } + + protected String getBriefDescription(Project project) { + return "Remove rows"; + } + + protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception { + Engine engine = createEngine(project); + + List rowIndices = new ArrayList(); + + FilteredRows filteredRows = engine.getAllFilteredRows(); + filteredRows.accept(project, createRowVisitor(project, rowIndices)); + + return new HistoryEntry( + historyEntryID, + project, + "Remove " + rowIndices.size() + " rows", + this, + new RowRemovalChange(rowIndices) + ); + } + + protected RowVisitor createRowVisitor(Project project, List rowIndices) throws Exception { + return new RowVisitor() { + List rowIndices; + + public RowVisitor init(List rowIndices) { + this.rowIndices = rowIndices; + return this; + } + + @Override + public void start(Project project) { + // nothing to do + } + + @Override + public void end(Project project) { + // nothing to do + } + + public boolean visit(Project project, int rowIndex, Row row) { + rowIndices.add(rowIndex); + + return false; + } + }.init(rowIndices); + } +} diff --git a/main/src/com/google/refine/operations/row/RowReorderOperation.java b/main/src/com/google/refine/operations/row/RowReorderOperation.java new file mode 100644 index 000000000..77fa1e770 --- /dev/null +++ b/main/src/com/google/refine/operations/row/RowReorderOperation.java @@ -0,0 +1,126 @@ +package com.google.refine.operations.row; + + import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.browsing.Engine; +import com.google.refine.browsing.RecordVisitor; +import com.google.refine.browsing.RowVisitor; +import com.google.refine.browsing.Engine.Mode; +import com.google.refine.history.HistoryEntry; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; +import com.google.refine.model.Record; +import com.google.refine.model.Row; +import com.google.refine.model.changes.RowReorderChange; +import com.google.refine.operations.OperationRegistry; +import com.google.refine.sorting.SortingRecordVisitor; +import com.google.refine.sorting.SortingRowVisitor; + +public class RowReorderOperation extends AbstractOperation { + static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { + String mode = obj.getString("mode"); + JSONObject sorting = obj.has("sorting") && !obj.isNull("sorting") ? + obj.getJSONObject("sorting") : null; + + return new RowReorderOperation(Engine.stringToMode(mode), sorting); + } + + final protected Mode _mode; + final protected JSONObject _sorting; + + public RowReorderOperation(Mode mode, JSONObject sorting) { + _mode = mode; + _sorting = sorting; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); + writer.key("description"); writer.value(getBriefDescription(null)); + writer.key("mode"); writer.value(Engine.modeToString(_mode)); + writer.key("sorting"); writer.value(_sorting); + writer.endObject(); + } + + protected String getBriefDescription(Project project) { + return "Reorder rows"; + } + + protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception { + Engine engine = new Engine(project); + engine.setMode(_mode); + + List rowIndices = new ArrayList(); + if (_mode == Mode.RowBased) { + RowVisitor visitor = new IndexingVisitor(rowIndices); + if (_sorting != null) { + SortingRowVisitor srv = new SortingRowVisitor(visitor); + + srv.initializeFromJSON(project, _sorting); + if (srv.hasCriteria()) { + visitor = srv; + } + } + + engine.getAllRows().accept(project, visitor); + } else { + RecordVisitor visitor = new IndexingVisitor(rowIndices); + if (_sorting != null) { + SortingRecordVisitor srv = new SortingRecordVisitor(visitor); + + srv.initializeFromJSON(project, _sorting); + if (srv.hasCriteria()) { + visitor = srv; + } + } + + engine.getAllRecords().accept(project, visitor); + } + + return new HistoryEntry( + historyEntryID, + project, + "Reorder rows", + this, + new RowReorderChange(rowIndices) + ); + } + + static protected class IndexingVisitor implements RowVisitor, RecordVisitor { + List _indices; + + IndexingVisitor(List indices) { + _indices = indices; + } + + @Override + public void start(Project project) { + } + + @Override + public void end(Project project) { + } + + @Override + public boolean visit(Project project, int rowIndex, Row row) { + _indices.add(rowIndex); + return false; + } + + @Override + public boolean visit(Project project, Record record) { + for (int r = record.fromRowIndex; r < record.toRowIndex; r++) { + _indices.add(r); + } + return false; + } + } +} diff --git a/main/src/com/google/refine/operations/row/RowStarOperation.java b/main/src/com/google/refine/operations/row/RowStarOperation.java new file mode 100644 index 000000000..e69c871e0 --- /dev/null +++ b/main/src/com/google/refine/operations/row/RowStarOperation.java @@ -0,0 +1,103 @@ +package com.google.refine.operations.row; + + import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.browsing.Engine; +import com.google.refine.browsing.FilteredRows; +import com.google.refine.browsing.RowVisitor; +import com.google.refine.history.Change; +import com.google.refine.history.HistoryEntry; +import com.google.refine.model.AbstractOperation; +import com.google.refine.model.Project; +import com.google.refine.model.Row; +import com.google.refine.model.changes.MassChange; +import com.google.refine.model.changes.RowStarChange; +import com.google.refine.operations.EngineDependentOperation; +import com.google.refine.operations.OperationRegistry; + +public class RowStarOperation extends EngineDependentOperation { + final protected boolean _starred; + + static public AbstractOperation reconstruct(Project project, JSONObject obj) throws Exception { + JSONObject engineConfig = obj.getJSONObject("engineConfig"); + boolean starred = obj.getBoolean("starred"); + + return new RowStarOperation( + engineConfig, + starred + ); + } + + public RowStarOperation(JSONObject engineConfig, boolean starred) { + super(engineConfig); + _starred = starred; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("op"); writer.value(OperationRegistry.s_opClassToName.get(this.getClass())); + writer.key("description"); writer.value(getBriefDescription(null)); + writer.key("engineConfig"); writer.value(getEngineConfig()); + writer.key("starred"); writer.value(_starred); + writer.endObject(); + } + + protected String getBriefDescription(Project project) { + return (_starred ? "Star rows" : "Unstar rows"); + } + + protected HistoryEntry createHistoryEntry(Project project, long historyEntryID) throws Exception { + Engine engine = createEngine(project); + + List changes = new ArrayList(project.rows.size()); + + FilteredRows filteredRows = engine.getAllFilteredRows(); + filteredRows.accept(project, createRowVisitor(project, changes)); + + return new HistoryEntry( + historyEntryID, + project, + (_starred ? "Star" : "Unstar") + " " + changes.size() + " rows", + this, + new MassChange(changes, false) + ); + } + + protected RowVisitor createRowVisitor(Project project, List changes) throws Exception { + return new RowVisitor() { + List changes; + + public RowVisitor init(List changes) { + this.changes = changes; + return this; + } + + @Override + public void start(Project project) { + // nothing to do + } + + @Override + public void end(Project project) { + // nothing to do + } + + public boolean visit(Project project, int rowIndex, Row row) { + if (row.starred != _starred) { + RowStarChange change = new RowStarChange(rowIndex, _starred); + + changes.add(change); + } + return false; + } + }.init(changes); + } +} diff --git a/main/src/com/google/refine/preference/PreferenceStore.java b/main/src/com/google/refine/preference/PreferenceStore.java new file mode 100644 index 000000000..988620ded --- /dev/null +++ b/main/src/com/google/refine/preference/PreferenceStore.java @@ -0,0 +1,90 @@ +package com.google.refine.preference; + +import java.lang.reflect.Method; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; +import java.util.Properties; +import java.util.Set; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.GridworksServlet; +import com.google.refine.Jsonizable; + +public class PreferenceStore implements Jsonizable { + protected Map _prefs = new HashMap(); + + public void put(String key, Object value) { + if (value == null) { + _prefs.remove(key); + } else { + _prefs.put(key, value); + } + } + + public Object get(String key) { + return _prefs.get(key); + } + + public Set getKeys() { + return _prefs.keySet(); + } + + @Override + public void write(JSONWriter writer, Properties options) throws JSONException { + writer.object(); + + writer.key("entries"); + writer.object(); + for (String k : _prefs.keySet()) { + writer.key(k); + + Object o = _prefs.get(k); + if (o instanceof Jsonizable) { + ((Jsonizable) o).write(writer, options); + } else { + writer.value(o); + } + } + writer.endObject(); + + writer.endObject(); + } + + @SuppressWarnings("unchecked") + public void load(JSONObject obj) throws JSONException { + if (obj.has("entries") && !obj.isNull("entries")) { + JSONObject entries = obj.getJSONObject("entries"); + + Iterator i = entries.keys(); + while (i.hasNext()) { + String key = i.next(); + if (!entries.isNull(key)) { + Object o = entries.get(key); + _prefs.put(key, loadObject(o)); + } + } + } + } + + static public Object loadObject(Object o) { + if (o instanceof JSONObject) { + try { + JSONObject obj2 = (JSONObject) o; + String className = obj2.getString("class"); + Class klass = GridworksServlet.getClass(className); + Method method = klass.getMethod("load", JSONObject.class); + + return method.invoke(null, obj2); + } catch (Exception e) { + e.printStackTrace(); + return null; + } + } else { + return o; + } + } +} diff --git a/main/src/com/google/refine/preference/TopList.java b/main/src/com/google/refine/preference/TopList.java new file mode 100644 index 000000000..9e8c0542e --- /dev/null +++ b/main/src/com/google/refine/preference/TopList.java @@ -0,0 +1,72 @@ +package com.google.refine.preference; + +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +import org.apache.commons.collections.list.UnmodifiableList; +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.Jsonizable; + + +public class TopList implements Jsonizable { + private static final long serialVersionUID = 2666669643063493350L; + + final protected int _top; + final protected List _list = new ArrayList(); + + public TopList(int top) { + _top = top; + } + + @SuppressWarnings("unchecked") + public List getList() { + return (List) UnmodifiableList.decorate(_list); + } + + public void add(String element) { + _list.remove(element); + _list.add(0, element); + while (_list.size() > _top) { + _list.remove(_list.size() - 1); + } + } + + @Override + public void write(JSONWriter writer, Properties options) throws JSONException { + writer.object(); + writer.key("class"); writer.value(this.getClass().getName()); + + writer.key("top"); writer.value(_top); + writer.key("list"); + writer.array(); + for (String element : _list) { + writer.value(element); + } + writer.endArray(); + writer.endObject(); + } + + static public TopList load(JSONObject obj) throws JSONException { + int top = obj.has("top") && !obj.isNull("top") ? obj.getInt("top") : 10; + TopList tl = new TopList(top); + + if (obj.has("list") && !obj.isNull("list")) { + JSONArray a = obj.getJSONArray("list"); + + tl.load(a); + } + return tl; + } + + public void load(JSONArray a) throws JSONException { + int length = a.length(); + for (int i = 0; i < length && _list.size() < _top; i++) { + _list.add(a.getString(i)); + } + } +} diff --git a/main/src/com/google/refine/process/LongRunningProcess.java b/main/src/com/google/refine/process/LongRunningProcess.java new file mode 100644 index 000000000..9cc3e0010 --- /dev/null +++ b/main/src/com/google/refine/process/LongRunningProcess.java @@ -0,0 +1,71 @@ +package com.google.refine.process; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.history.HistoryEntry; + +abstract public class LongRunningProcess extends Process { + final protected String _description; + protected ProcessManager _manager; + protected Thread _thread; + protected int _progress; // out of 100 + protected boolean _canceled; + + protected LongRunningProcess(String description) { + _description = description; + } + + public void cancel() { + _canceled = true; + if (_thread != null && _thread.isAlive()) { + _thread.interrupt(); + } + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("id"); writer.value(hashCode()); + writer.key("description"); writer.value(_description); + writer.key("immediate"); writer.value(false); + writer.key("status"); writer.value(_thread == null ? "pending" : (_thread.isAlive() ? "running" : "done")); + writer.key("progress"); writer.value(_progress); + writer.endObject(); + } + + @Override + public boolean isImmediate() { + return false; + } + + @Override + public boolean isRunning() { + return _thread != null && _thread.isAlive(); + } + + @Override + public boolean isDone() { + return _thread != null && !_thread.isAlive(); + } + + @Override + public HistoryEntry performImmediate() { + throw new RuntimeException("Not an immediate process"); + } + + @Override + public void startPerforming(ProcessManager manager) { + if (_thread == null) { + _manager = manager; + + _thread = new Thread(getRunnable()); + _thread.start(); + } + } + + abstract protected Runnable getRunnable(); +} diff --git a/main/src/com/google/refine/process/Process.java b/main/src/com/google/refine/process/Process.java new file mode 100644 index 000000000..161b24d02 --- /dev/null +++ b/main/src/com/google/refine/process/Process.java @@ -0,0 +1,16 @@ +package com.google.refine.process; + +import com.google.refine.Jsonizable; +import com.google.refine.history.HistoryEntry; + +public abstract class Process implements Jsonizable { + abstract public boolean isImmediate(); + + abstract public boolean isRunning(); + abstract public boolean isDone(); + + abstract public HistoryEntry performImmediate() throws Exception; + + abstract public void startPerforming(ProcessManager manager); + abstract public void cancel(); +} diff --git a/main/src/com/google/refine/process/ProcessManager.java b/main/src/com/google/refine/process/ProcessManager.java new file mode 100644 index 000000000..5c21c6431 --- /dev/null +++ b/main/src/com/google/refine/process/ProcessManager.java @@ -0,0 +1,95 @@ +package com.google.refine.process; + +import java.util.LinkedList; +import java.util.List; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.Jsonizable; +import com.google.refine.history.HistoryEntry; +import com.google.refine.history.HistoryProcess; + +public class ProcessManager implements Jsonizable { + protected List _processes = new LinkedList(); + + public ProcessManager() { + + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("processes"); writer.array(); + for (Process p : _processes) { + p.write(writer, options); + } + writer.endArray(); + + writer.endObject(); + } + + public HistoryEntry queueProcess(Process process) throws Exception { + if (process.isImmediate() && _processes.size() == 0) { + return process.performImmediate(); + } else { + _processes.add(process); + + update(); + } + return null; + } + + public boolean queueProcess(HistoryProcess process) throws Exception { + if (process.isImmediate() && _processes.size() == 0) { + return process.performImmediate() != null; + } else { + _processes.add(process); + + update(); + } + return false; + } + + public boolean hasPending() { + return _processes.size() > 0; + } + + public void onDoneProcess(Process p) { + _processes.remove(p); + update(); + } + + public void cancelAll() { + for (Process p : _processes) { + if (!p.isImmediate() && p.isRunning()) { + p.cancel(); + } + } + _processes.clear(); + } + + protected void update() { + while (_processes.size() > 0) { + Process p = _processes.get(0); + if (p.isImmediate()) { + try { + p.performImmediate(); + } catch (Exception e) { + // TODO: Not sure what to do yet + e.printStackTrace(); + } + _processes.remove(0); + } else if (p.isDone()) { + _processes.remove(0); + } else { + if (!p.isRunning()) { + p.startPerforming(this); + } + break; + } + } + } +} diff --git a/main/src/com/google/refine/process/QuickHistoryEntryProcess.java b/main/src/com/google/refine/process/QuickHistoryEntryProcess.java new file mode 100644 index 000000000..994d6b9e3 --- /dev/null +++ b/main/src/com/google/refine/process/QuickHistoryEntryProcess.java @@ -0,0 +1,66 @@ +package com.google.refine.process; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.history.HistoryEntry; +import com.google.refine.model.Project; + +abstract public class QuickHistoryEntryProcess extends Process { + final protected Project _project; + final protected String _briefDescription; + protected HistoryEntry _historyEntry; + boolean _done = false; + + public QuickHistoryEntryProcess(Project project, String briefDescription) { + _project = project; + _briefDescription = briefDescription; + } + + public void cancel() { + throw new RuntimeException("Not a long-running process"); + } + + public boolean isImmediate() { + return true; + } + + public boolean isRunning() { + throw new RuntimeException("Not a long-running process"); + } + + public HistoryEntry performImmediate() throws Exception { + if (_historyEntry == null) { + _historyEntry = createHistoryEntry(HistoryEntry.allocateID()); + } + _project.history.addEntry(_historyEntry); + _done = true; + + return _historyEntry; + } + + public void startPerforming(ProcessManager manager) { + throw new RuntimeException("Not a long-running process"); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("id"); writer.value(hashCode()); + writer.key("description"); writer.value(_historyEntry != null ? _historyEntry.description : _briefDescription); + writer.key("immediate"); writer.value(true); + writer.key("status"); writer.value(_done ? "done" : "pending"); + writer.endObject(); + } + + + @Override + public boolean isDone() { + return _done; + } + + abstract protected HistoryEntry createHistoryEntry(long historyEntryID) throws Exception; +} diff --git a/main/src/com/google/refine/protograph/AnonymousNode.java b/main/src/com/google/refine/protograph/AnonymousNode.java new file mode 100644 index 000000000..d489c7eec --- /dev/null +++ b/main/src/com/google/refine/protograph/AnonymousNode.java @@ -0,0 +1,45 @@ +package com.google.refine.protograph; + +import java.util.LinkedList; +import java.util.List; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +public class AnonymousNode implements Node, NodeWithLinks { + final public FreebaseType type; + final public List links = new LinkedList(); + + public AnonymousNode(FreebaseType type) { + this.type = type; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("nodeType"); writer.value("anonymous"); + writer.key("type"); type.write(writer, options); + if (links != null) { + writer.key("links"); writer.array(); + for (Link link : links) { + link.write(writer, options); + } + writer.endArray(); + } + writer.endObject(); + } + + public void addLink(Link link) { + links.add(link); + } + + public Link getLink(int index) { + return links.get(index); + } + + public int getLinkCount() { + return links.size(); + } +} diff --git a/main/src/com/google/refine/protograph/BooleanColumnCondition.java b/main/src/com/google/refine/protograph/BooleanColumnCondition.java new file mode 100644 index 000000000..a70c012cf --- /dev/null +++ b/main/src/com/google/refine/protograph/BooleanColumnCondition.java @@ -0,0 +1,42 @@ +package com.google.refine.protograph; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.Row; + + +public class BooleanColumnCondition implements Condition { + final public String columnName; + + public BooleanColumnCondition(String columnName) { + this.columnName = columnName; + } + + @Override + public boolean test(Project project, int rowIndex, Row row) { + Column column = project.columnModel.getColumnByName(columnName); + if (column != null) { + Object o = row.getCellValue(column.getCellIndex()); + if (o != null) { + if (o instanceof Boolean) { + return ((Boolean) o).booleanValue(); + } else { + return Boolean.parseBoolean(o.toString()); + } + } + } + return false; + } + + @Override + public void write(JSONWriter writer, Properties options) throws JSONException { + writer.object(); + writer.key("columnName"); writer.value(columnName); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/protograph/CellKeyNode.java b/main/src/com/google/refine/protograph/CellKeyNode.java new file mode 100644 index 000000000..5ce182d0b --- /dev/null +++ b/main/src/com/google/refine/protograph/CellKeyNode.java @@ -0,0 +1,33 @@ +package com.google.refine.protograph; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +public class CellKeyNode extends CellNode { + final public FreebaseTopic namespace; + + public CellKeyNode( + FreebaseTopic namespace + ) { + this.namespace = namespace; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("nodeType"); writer.value("cell-as-key"); + + writer.key("columnNames"); + writer.array(); + for (String name : columnNames) { + writer.value(name); + } + writer.endArray(); + + writer.key("namespace"); namespace.write(writer, options); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/protograph/CellNode.java b/main/src/com/google/refine/protograph/CellNode.java new file mode 100644 index 000000000..1e42aed56 --- /dev/null +++ b/main/src/com/google/refine/protograph/CellNode.java @@ -0,0 +1,8 @@ +package com.google.refine.protograph; + +import java.util.LinkedList; +import java.util.List; + +abstract public class CellNode implements Node { + final public List columnNames = new LinkedList(); +} diff --git a/main/src/com/google/refine/protograph/CellTopicNode.java b/main/src/com/google/refine/protograph/CellTopicNode.java new file mode 100644 index 000000000..cb42ef8a4 --- /dev/null +++ b/main/src/com/google/refine/protograph/CellTopicNode.java @@ -0,0 +1,56 @@ +package com.google.refine.protograph; + +import java.util.LinkedList; +import java.util.List; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +public class CellTopicNode extends CellNode implements NodeWithLinks { + final public FreebaseType type; + final public List links = new LinkedList(); + + public CellTopicNode( + FreebaseType type + ) { + this.type = type; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("nodeType"); writer.value("cell-as-topic"); + writer.key("columnNames"); + writer.array(); + for (String name : columnNames) { + writer.value(name); + } + writer.endArray(); + if (type != null) { + writer.key("type"); type.write(writer, options); + } + if (links != null) { + writer.key("links"); writer.array(); + for (Link link : links) { + link.write(writer, options); + } + writer.endArray(); + } + + writer.endObject(); + } + + public void addLink(Link link) { + links.add(link); + } + + public Link getLink(int index) { + return links.get(index); + } + + public int getLinkCount() { + return links.size(); + } +} diff --git a/main/src/com/google/refine/protograph/CellValueNode.java b/main/src/com/google/refine/protograph/CellValueNode.java new file mode 100644 index 000000000..efd02bf20 --- /dev/null +++ b/main/src/com/google/refine/protograph/CellValueNode.java @@ -0,0 +1,36 @@ +package com.google.refine.protograph; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +public class CellValueNode extends CellNode { + final public String valueType; + final public String lang; + + public CellValueNode( + String valueType, + String lang + ) { + this.valueType = valueType; + this.lang = lang; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("nodeType"); writer.value("cell-as-value"); + writer.key("columnNames"); + writer.array(); + for (String name : columnNames) { + writer.value(name); + } + writer.endArray(); + writer.key("valueType"); writer.value(valueType); + writer.key("lang"); writer.value(lang); + writer.endObject(); + } + +} diff --git a/main/src/com/google/refine/protograph/Condition.java b/main/src/com/google/refine/protograph/Condition.java new file mode 100644 index 000000000..11f2f8d00 --- /dev/null +++ b/main/src/com/google/refine/protograph/Condition.java @@ -0,0 +1,9 @@ +package com.google.refine.protograph; + +import com.google.refine.Jsonizable; +import com.google.refine.model.Project; +import com.google.refine.model.Row; + +public interface Condition extends Jsonizable { + public boolean test(Project project, int rowIndex, Row row); +} diff --git a/main/src/com/google/refine/protograph/FreebaseProperty.java b/main/src/com/google/refine/protograph/FreebaseProperty.java new file mode 100644 index 000000000..9a9110350 --- /dev/null +++ b/main/src/com/google/refine/protograph/FreebaseProperty.java @@ -0,0 +1,9 @@ +package com.google.refine.protograph; + +public class FreebaseProperty extends FreebaseTopic { + //final protected FreebaseType _expectedType; + + public FreebaseProperty(String id, String name) { + super(id, name); + } +} diff --git a/main/src/com/google/refine/protograph/FreebaseTopic.java b/main/src/com/google/refine/protograph/FreebaseTopic.java new file mode 100644 index 000000000..dd0bc5bba --- /dev/null +++ b/main/src/com/google/refine/protograph/FreebaseTopic.java @@ -0,0 +1,28 @@ +package com.google.refine.protograph; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.Jsonizable; + +public class FreebaseTopic implements Jsonizable { + final public String id; + final public String name; + + public FreebaseTopic(String id, String name) { + this.id = id; + this.name = name; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("id"); writer.value(id); + writer.key("name"); writer.value(name); + writer.endObject(); + } + +} diff --git a/main/src/com/google/refine/protograph/FreebaseTopicNode.java b/main/src/com/google/refine/protograph/FreebaseTopicNode.java new file mode 100644 index 000000000..7d2774b9f --- /dev/null +++ b/main/src/com/google/refine/protograph/FreebaseTopicNode.java @@ -0,0 +1,46 @@ +package com.google.refine.protograph; + +import java.util.LinkedList; +import java.util.List; +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +public class FreebaseTopicNode implements Node, NodeWithLinks { + final public FreebaseTopic topic; + final public List links = new LinkedList(); + + public FreebaseTopicNode(FreebaseTopic topic) { + this.topic = topic; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("nodeType"); writer.value("topic"); + writer.key("topic"); topic.write(writer, options); + if (links != null) { + writer.key("links"); writer.array(); + for (Link link : links) { + link.write(writer, options); + } + writer.endArray(); + } + + writer.endObject(); + } + + public void addLink(Link link) { + links.add(link); + } + + public Link getLink(int index) { + return links.get(index); + } + + public int getLinkCount() { + return links.size(); + } +} diff --git a/main/src/com/google/refine/protograph/FreebaseType.java b/main/src/com/google/refine/protograph/FreebaseType.java new file mode 100644 index 000000000..67282c71d --- /dev/null +++ b/main/src/com/google/refine/protograph/FreebaseType.java @@ -0,0 +1,36 @@ +package com.google.refine.protograph; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.Jsonizable; + +public class FreebaseType extends FreebaseTopic implements Jsonizable { + public FreebaseType(String id, String name) { + super(id, name); + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("id"); writer.value(id); + writer.key("name"); writer.value(name); + writer.endObject(); + } + + static public FreebaseType load(JSONObject obj) throws Exception { + if (obj == null) { + return null; + } + + FreebaseType type = new FreebaseType( + obj.getString("id"), + obj.getString("name") + ); + return type; + } +} diff --git a/main/src/com/google/refine/protograph/Link.java b/main/src/com/google/refine/protograph/Link.java new file mode 100644 index 000000000..fb0813352 --- /dev/null +++ b/main/src/com/google/refine/protograph/Link.java @@ -0,0 +1,47 @@ +package com.google.refine.protograph; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.Jsonizable; + +public class Link implements Jsonizable { + final public FreebaseProperty property; + final public Node target; + final public Condition condition; + final public boolean load; + + public Link(FreebaseProperty property, Node target, Condition condition, boolean load) { + this.property = property; + this.target = target; + this.condition = condition; + this.load = load; + } + + public FreebaseProperty getProperty() { + return property; + } + + public Node getTarget() { + return target; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("property"); property.write(writer, options); + if (target != null) { + writer.key("target"); + target.write(writer, options); + } + if (condition != null) { + writer.key("condition"); + condition.write(writer, options); + } + writer.endObject(); + } + +} diff --git a/main/src/com/google/refine/protograph/Node.java b/main/src/com/google/refine/protograph/Node.java new file mode 100644 index 000000000..5ccd8b598 --- /dev/null +++ b/main/src/com/google/refine/protograph/Node.java @@ -0,0 +1,6 @@ +package com.google.refine.protograph; + +import com.google.refine.Jsonizable; + +public interface Node extends Jsonizable { +} diff --git a/main/src/com/google/refine/protograph/NodeWithLinks.java b/main/src/com/google/refine/protograph/NodeWithLinks.java new file mode 100644 index 000000000..ed32a3b73 --- /dev/null +++ b/main/src/com/google/refine/protograph/NodeWithLinks.java @@ -0,0 +1,9 @@ +package com.google.refine.protograph; + +public interface NodeWithLinks { + public void addLink(Link link); + + public int getLinkCount(); + + public Link getLink(int index); +} diff --git a/main/src/com/google/refine/protograph/Protograph.java b/main/src/com/google/refine/protograph/Protograph.java new file mode 100644 index 000000000..7e4fc43d3 --- /dev/null +++ b/main/src/com/google/refine/protograph/Protograph.java @@ -0,0 +1,168 @@ +package com.google.refine.protograph; + +import java.util.LinkedList; +import java.util.List; +import java.util.Properties; + +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.model.OverlayModel; +import com.google.refine.model.Project; + +public class Protograph implements OverlayModel { + final protected List _rootNodes = new LinkedList(); + + public int getRootNodeCount() { + return _rootNodes.size(); + } + + public Node getRootNode(int index) { + return _rootNodes.get(index); + } + + @Override + public void onBeforeSave() { + } + + @Override + public void onAfterSave() { + } + + + @Override + public void dispose() { + } + + static public Protograph reconstruct(JSONObject o) throws JSONException { + Protograph g = new Protograph(); + + JSONArray rootNodes = o.getJSONArray("rootNodes"); + int count = rootNodes.length(); + + for (int i = 0; i < count; i++) { + JSONObject o2 = rootNodes.getJSONObject(i); + Node node = reconstructNode(o2); + if (node != null) { + g._rootNodes.add(node); + } + } + + return g; + } + + static protected Node reconstructNode(JSONObject o) throws JSONException { + Node node = null; + + String nodeType = o.getString("nodeType"); + if (nodeType.startsWith("cell-as-")) { + if ("cell-as-topic".equals(nodeType)) { + if (o.has("type")) { + node = new CellTopicNode( + reconstructType(o.getJSONObject("type")) + ); + } + } else if ("cell-as-value".equals(nodeType)) { + node = new CellValueNode( + o.getString("valueType"), + o.getString("lang") + ); + } else if ("cell-as-key".equals(nodeType)) { + node = new CellKeyNode( + reconstructTopic(o.getJSONObject("namespace")) + ); + } + + if (o.has("columnName") && !o.isNull("columnName")) { + ((CellNode) node).columnNames.add(o.getString("columnName")); + } + if (o.has("columnNames") && !o.isNull("columnNames")) { + JSONArray columnNames = o.getJSONArray("columnNames"); + int count = columnNames.length(); + + for (int c = 0; c < count; c++) { + ((CellNode) node).columnNames.add(columnNames.getString(c)); + } + } + } else if ("topic".equals(nodeType)) { + node = new FreebaseTopicNode(reconstructTopic(o.getJSONObject("topic"))); + } else if ("value".equals(nodeType)) { + node = new ValueNode( + o.get("value"), + o.getString("valueType"), + o.getString("lang") + ); + } else if ("anonymous".equals(nodeType)) { + node = new AnonymousNode(reconstructType(o.getJSONObject("type"))); + } + + if (node != null && node instanceof NodeWithLinks && o.has("links")) { + NodeWithLinks node2 = (NodeWithLinks) node; + + JSONArray links = o.getJSONArray("links"); + int linkCount = links.length(); + + for (int j = 0; j < linkCount; j++) { + JSONObject oLink = links.getJSONObject(j); + Condition condition = null; + + if (oLink.has("condition") && !oLink.isNull("condition")) { + JSONObject oCondition = oLink.getJSONObject("condition"); + if (oCondition.has("columnName") && !oCondition.isNull("columnName")) { + condition = new BooleanColumnCondition(oCondition.getString("columnName")); + } + } + + node2.addLink(new Link( + reconstructProperty(oLink.getJSONObject("property")), + oLink.has("target") && !oLink.isNull("target") ? + reconstructNode(oLink.getJSONObject("target")) : null, + condition, + oLink.has("load") && !oLink.isNull("load") ? + oLink.getBoolean("load") : true + )); + } + } + + return node; + } + + static protected FreebaseProperty reconstructProperty(JSONObject o) throws JSONException { + return new FreebaseProperty( + o.getString("id"), + o.getString("name") + ); + } + + static protected FreebaseType reconstructType(JSONObject o) throws JSONException { + return new FreebaseType( + o.getString("id"), + o.getString("name") + ); + } + + static protected FreebaseTopic reconstructTopic(JSONObject o) throws JSONException { + return new FreebaseTopic( + o.getString("id"), + o.getString("name") + ); + } + + public void write(JSONWriter writer, Properties options) throws JSONException { + writer.object(); + writer.key("rootNodes"); writer.array(); + + for (Node node : _rootNodes) { + node.write(writer, options); + } + + writer.endArray(); + writer.endObject(); + } + + static public Protograph load(Project project, JSONObject obj) throws Exception { + return reconstruct(obj); + } +} diff --git a/main/src/com/google/refine/protograph/ValueNode.java b/main/src/com/google/refine/protograph/ValueNode.java new file mode 100644 index 000000000..ced705021 --- /dev/null +++ b/main/src/com/google/refine/protograph/ValueNode.java @@ -0,0 +1,29 @@ +package com.google.refine.protograph; + +import java.util.Properties; + +import org.json.JSONException; +import org.json.JSONWriter; + +public class ValueNode implements Node { + final public Object value; + final public String valueType; + final public String lang; + + public ValueNode(Object value, String valueType, String lang) { + this.value = value; + this.valueType = valueType; + this.lang = lang; + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + writer.key("nodeType"); writer.value("value"); + writer.key("value"); writer.value(value); + writer.key("valueType"); writer.value(valueType); + writer.key("lang"); writer.value(lang); + writer.endObject(); + } +} diff --git a/main/src/com/google/refine/protograph/transpose/MqlwriteLikeTransposedNodeFactory.java b/main/src/com/google/refine/protograph/transpose/MqlwriteLikeTransposedNodeFactory.java new file mode 100644 index 000000000..a03c743fd --- /dev/null +++ b/main/src/com/google/refine/protograph/transpose/MqlwriteLikeTransposedNodeFactory.java @@ -0,0 +1,346 @@ +package com.google.refine.protograph.transpose; + +import java.io.IOException; +import java.io.Writer; +import java.util.LinkedList; +import java.util.List; + +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.model.Cell; +import com.google.refine.model.Recon; +import com.google.refine.protograph.AnonymousNode; +import com.google.refine.protograph.CellKeyNode; +import com.google.refine.protograph.CellNode; +import com.google.refine.protograph.CellTopicNode; +import com.google.refine.protograph.CellValueNode; +import com.google.refine.protograph.FreebaseProperty; +import com.google.refine.protograph.FreebaseTopicNode; +import com.google.refine.protograph.Link; +import com.google.refine.protograph.ValueNode; +import com.google.refine.util.JSONUtilities; + +public class MqlwriteLikeTransposedNodeFactory implements TransposedNodeFactory { + protected Writer writer; + protected List rootObjects = new LinkedList(); + + private static final String TYPE = "type"; + private static final String ID = "id"; + private static final String NAME = "name"; + private static final String CREATE = "create"; + private static final String VALUE = "value"; + private static final String CONNECT = "connect"; + private static final String LANG = "lang"; + + public MqlwriteLikeTransposedNodeFactory(Writer writer) { + this.writer = writer; + } + + protected JSONArray getJSON() { + return new JSONArray(rootObjects); + } + + @Override + public void flush() throws IOException { + try { + JSONWriter jsonWriter = new JSONWriter(writer); + + jsonWriter.array(); + for (JSONObject obj : rootObjects) { + jsonWriter.value(obj); + } + jsonWriter.endArray(); + + } catch (JSONException e) { + e.printStackTrace(); + } + writer.flush(); + } + + abstract protected class JsonTransposedNode implements TransposedNode { + abstract public Object getJSON(); + } + + abstract protected class JsonObjectTransposedNode extends JsonTransposedNode { + abstract public JSONObject getJSONObject(); + + protected JSONObject obj; + + public Object getJSON() { + return getJSONObject(); + } + } + + protected class AnonymousTransposedNode extends JsonObjectTransposedNode { + JsonObjectTransposedNode parent; + FreebaseProperty property; + AnonymousNode node; + + protected AnonymousTransposedNode( + JsonObjectTransposedNode parent, + FreebaseProperty property, + AnonymousNode node + ) { + this.parent = parent; + this.property = property; + this.node = node; + } + + public JSONObject getJSONObject() { + if (obj == null) { + obj = new JSONObject(); + try { + obj.put(TYPE, this.node.type.id); + obj.put(ID, (String) null); + obj.put(CREATE, "unconditional"); + } catch (JSONException e) { + e.printStackTrace(); + } + + linkTransposedNodeJSON(obj, parent, property); + } + + return obj; + } + } + + protected class CellTopicTransposedNode extends JsonObjectTransposedNode { + protected CellTopicNode node; + protected Cell cell; + + public CellTopicTransposedNode(CellTopicNode node, Cell cell) { + this.node = node; + this.cell = cell; + } + + @Override + public JSONObject getJSONObject() { + if (obj == null) { + obj = new JSONObject(); + try { + if (cell.recon != null && + cell.recon.judgment == Recon.Judgment.Matched && + cell.recon.match != null) { + obj.put(ID, cell.recon.match.id); + } else { + obj.put(ID, (String) null); + obj.put(NAME, cell.value.toString()); + obj.put(TYPE, node.type.id); + obj.put(CREATE, "unless_exists"); + } + } catch (JSONException e) { + e.printStackTrace(); + } + } + return obj; + } + } + + protected class CellValueTransposedNode extends JsonTransposedNode { + protected JSONObject obj; + protected CellValueNode node; + protected Cell cell; + + public CellValueTransposedNode(CellValueNode node, Cell cell) { + this.node = node; + this.cell = cell; + } + + public Object getJSON() { + if (obj == null) { + obj = new JSONObject(); + try { + JSONUtilities.putField(obj, VALUE, cell.value); + + obj.put(TYPE, node.valueType); + if ("/type/text".equals(node.valueType)) { + obj.put(LANG, node.lang); + } + + obj.put(CONNECT, "insert"); + } catch (JSONException e) { + e.printStackTrace(); + } + } + return obj; + } + } + + protected class CellKeyTransposedNode extends JsonTransposedNode { + protected JSONObject obj; + protected CellKeyNode node; + protected Cell cell; + + public CellKeyTransposedNode(CellKeyNode node, Cell cell) { + this.node = node; + this.cell = cell; + } + + public Object getJSON() { + if (obj == null) { + obj = new JSONObject(); + try { + obj.put(VALUE, cell.value.toString()); + + JSONObject nsObj = new JSONObject(); + nsObj.put(ID, node.namespace.id); + + obj.put("namespace", nsObj); + obj.put(CONNECT, "insert"); + } catch (JSONException e) { + e.printStackTrace(); + } + } + return obj; + } + } + + protected class TopicTransposedNode extends JsonObjectTransposedNode { + protected FreebaseTopicNode node; + + public TopicTransposedNode(FreebaseTopicNode node) { + this.node = node; + } + + @Override + public JSONObject getJSONObject() { + if (obj == null) { + obj = new JSONObject(); + try { + obj.put(ID, node.topic.id); + } catch (JSONException e) { + e.printStackTrace(); + } + } + return obj; + } + } + + protected class ValueTransposedNode extends JsonTransposedNode { + protected JSONObject obj; + protected ValueNode node; + + public ValueTransposedNode(ValueNode node) { + this.node = node; + } + + public Object getJSON() { + if (obj == null) { + obj = new JSONObject(); + try { + obj.put(VALUE, node.value); + obj.put(TYPE, node.valueType); + if ("/type/text".equals(node.valueType)) { + obj.put(LANG, node.lang); + } + + obj.put(CONNECT, "insert"); + } catch (JSONException e) { + e.printStackTrace(); + } + } + return obj; + } + } + public TransposedNode transposeAnonymousNode( + TransposedNode parentNode, + Link link, + AnonymousNode node, int rowIndex) { + + return new AnonymousTransposedNode( + parentNode instanceof JsonObjectTransposedNode ? (JsonObjectTransposedNode) parentNode : null, + link != null ? link.property : null, + node + ); + } + + public TransposedNode transposeCellNode( + TransposedNode parentNode, + Link link, + CellNode node, + int rowIndex, + int cellIndex, + Cell cell) { + + JsonTransposedNode tnode = null; + if (node instanceof CellTopicNode) { + tnode = new CellTopicTransposedNode((CellTopicNode) node, cell); + } else if (node instanceof CellValueNode) { + tnode = new CellValueTransposedNode((CellValueNode) node, cell); + } else if (node instanceof CellKeyNode) { + tnode = new CellKeyTransposedNode((CellKeyNode) node, cell); + } + + if (tnode != null) { + processTransposedNode(tnode, parentNode, link != null ? link.property : null); + } + return tnode; + } + + public TransposedNode transposeTopicNode( + TransposedNode parentNode, + Link link, + FreebaseTopicNode node, int rowIndex) { + + JsonTransposedNode tnode = new TopicTransposedNode(node); + + processTransposedNode(tnode, parentNode, link != null ? link.property : null); + + return tnode; + } + + public TransposedNode transposeValueNode( + TransposedNode parentNode, + Link link, + ValueNode node, int rowIndex) { + + JsonTransposedNode tnode = new ValueTransposedNode(node); + + processTransposedNode(tnode, parentNode, link != null ? link.property : null); + + return tnode; + } + + protected void processTransposedNode( + JsonTransposedNode tnode, + TransposedNode parentNode, + FreebaseProperty property + ) { + + if (!(tnode instanceof AnonymousTransposedNode)) { + linkTransposedNodeJSON(tnode.getJSON(), parentNode, property); + } + } + + protected void linkTransposedNodeJSON( + Object obj, + TransposedNode parentNode, + FreebaseProperty property + ) { + + if (parentNode == null) { + if (obj instanceof JSONObject) { + rootObjects.add((JSONObject) obj); + } + } else if (parentNode instanceof JsonTransposedNode) { + JSONObject parentObj = ((JsonObjectTransposedNode) parentNode).getJSONObject(); + + try { + JSONArray a = null; + if (parentObj.has(property.id)) { + a = parentObj.getJSONArray(property.id); + } else { + a = new JSONArray(); + parentObj.put(property.id, a); + } + + a.put(a.length(), obj); + } catch (JSONException e) { + e.printStackTrace(); + } + } + } +} diff --git a/main/src/com/google/refine/protograph/transpose/TransposedNode.java b/main/src/com/google/refine/protograph/transpose/TransposedNode.java new file mode 100644 index 000000000..437b5b9d8 --- /dev/null +++ b/main/src/com/google/refine/protograph/transpose/TransposedNode.java @@ -0,0 +1,4 @@ +package com.google.refine.protograph.transpose; + +public interface TransposedNode { +} diff --git a/main/src/com/google/refine/protograph/transpose/TransposedNodeFactory.java b/main/src/com/google/refine/protograph/transpose/TransposedNodeFactory.java new file mode 100644 index 000000000..92716beba --- /dev/null +++ b/main/src/com/google/refine/protograph/transpose/TransposedNodeFactory.java @@ -0,0 +1,43 @@ +package com.google.refine.protograph.transpose; + +import java.io.IOException; + +import com.google.refine.model.Cell; +import com.google.refine.protograph.AnonymousNode; +import com.google.refine.protograph.CellNode; +import com.google.refine.protograph.FreebaseTopicNode; +import com.google.refine.protograph.Link; +import com.google.refine.protograph.ValueNode; + +public interface TransposedNodeFactory { + public TransposedNode transposeAnonymousNode( + TransposedNode parentNode, + Link link, + AnonymousNode node, int rowIndex + ); + + public TransposedNode transposeCellNode( + TransposedNode parentNode, + Link link, + CellNode node, + int rowIndex, + int cellIndex, + Cell cell + ); + + public TransposedNode transposeValueNode( + TransposedNode parentNode, + Link link, + ValueNode node, + int rowIndex + ); + + public TransposedNode transposeTopicNode( + TransposedNode parentNode, + Link link, + FreebaseTopicNode node, + int rowIndex + ); + + public void flush() throws IOException; +} diff --git a/main/src/com/google/refine/protograph/transpose/Transposer.java b/main/src/com/google/refine/protograph/transpose/Transposer.java new file mode 100644 index 000000000..b6f288b4b --- /dev/null +++ b/main/src/com/google/refine/protograph/transpose/Transposer.java @@ -0,0 +1,222 @@ +package com.google.refine.protograph.transpose; + +import java.util.LinkedList; +import java.util.List; + +import com.google.refine.browsing.FilteredRows; +import com.google.refine.browsing.RowVisitor; +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.Row; +import com.google.refine.model.Recon.Judgment; +import com.google.refine.protograph.AnonymousNode; +import com.google.refine.protograph.CellNode; +import com.google.refine.protograph.CellTopicNode; +import com.google.refine.protograph.FreebaseTopicNode; +import com.google.refine.protograph.Link; +import com.google.refine.protograph.Node; +import com.google.refine.protograph.NodeWithLinks; +import com.google.refine.protograph.Protograph; +import com.google.refine.protograph.ValueNode; + +public class Transposer { + static public void transpose( + Project project, + FilteredRows filteredRows, + Protograph protograph, + Node rootNode, + TransposedNodeFactory nodeFactory + ) { + transpose(project, filteredRows, protograph, rootNode, nodeFactory, 20); + } + + static public void transpose( + Project project, + FilteredRows filteredRows, + Protograph protograph, + Node rootNode, + TransposedNodeFactory nodeFactory, + int limit + ) { + Context rootContext = new Context(rootNode, null, null, limit); + + filteredRows.accept(project, new RowVisitor() { + Context rootContext; + Protograph protograph; + Node rootNode; + TransposedNodeFactory nodeFactory; + + @Override + public boolean visit(Project project, int rowIndex, Row row) { + if (rootContext.limit <= 0 || rootContext.count < rootContext.limit) { + descend(project, protograph, nodeFactory, rowIndex, row, rootNode, rootContext); + } + + if (rootContext.limit > 0 && rootContext.count > rootContext.limit) { + return true; + } + return false; + } + + @Override + public void start(Project project) { + // TODO Auto-generated method stub + + } + + @Override + public void end(Project project) { + // TODO Auto-generated method stub + + } + + public RowVisitor init( + Context rootContext, + Protograph protograph, + Node rootNode, + TransposedNodeFactory nodeFactory + ) { + this.rootContext = rootContext; + this.protograph = protograph; + this.rootNode = rootNode; + this.nodeFactory = nodeFactory; + + return this; + } + }.init(rootContext, protograph, rootNode, nodeFactory)); + } + + static protected void descend( + Project project, + Protograph protograph, + TransposedNodeFactory nodeFactory, + int rowIndex, + Row row, + Node node, + Context context + ) { + List tnodes = new LinkedList(); + + TransposedNode parentNode = context.parent == null ? null : context.parent.transposedNode; + Link link = context.parent == null ? null : context.link; + + if (node instanceof CellNode) { + CellNode node2 = (CellNode) node; + for (String columnName : node2.columnNames) { + Column column = project.columnModel.getColumnByName(columnName); + if (column != null) { + int cellIndex = column.getCellIndex(); + + Cell cell = row.getCell(cellIndex); + if (cell != null && ExpressionUtils.isNonBlankData(cell.value)) { + if (node2 instanceof CellTopicNode && + (cell.recon == null || cell.recon.judgment == Judgment.None)) { + return; + } + + context.count++; + if (context.limit > 0 && context.count > context.limit) { + return; + } + + tnodes.add(nodeFactory.transposeCellNode( + parentNode, + link, + node2, + rowIndex, + cellIndex, + cell + )); + } + } + } + } else { + if (node instanceof AnonymousNode) { + tnodes.add(nodeFactory.transposeAnonymousNode( + parentNode, + link, + (AnonymousNode) node, + rowIndex + )); + } else if (node instanceof FreebaseTopicNode) { + tnodes.add(nodeFactory.transposeTopicNode( + parentNode, + link, + (FreebaseTopicNode) node, + rowIndex + )); + } else if (node instanceof ValueNode) { + tnodes.add(nodeFactory.transposeValueNode( + parentNode, + link, + (ValueNode) node, + rowIndex + )); + } + } + + if (node instanceof NodeWithLinks) { + NodeWithLinks node2 = (NodeWithLinks) node; + int linkCount = node2.getLinkCount(); + + for (int i = 0; i < linkCount; i++) { + Link link2 = node2.getLink(i); + if (link2.condition == null || link2.condition.test(project, rowIndex, row)) { + for (TransposedNode tnode : tnodes) { + context.transposedNode = tnode; + context.nullifySubContextNodes(); + + descend( + project, + protograph, + nodeFactory, + rowIndex, + row, + link2.getTarget(), + context.subContexts.get(i) + ); + } + } + } + } + } + + static class Context { + TransposedNode transposedNode; + List subContexts; + Context parent; + Link link; + int count; + int limit; + + Context(Node node, Context parent, Link link, int limit) { + this.parent = parent; + this.link = link; + this.limit = limit; + + if (node instanceof NodeWithLinks) { + NodeWithLinks node2 = (NodeWithLinks) node; + + int subContextCount = node2.getLinkCount(); + + subContexts = new LinkedList(); + for (int i = 0; i < subContextCount; i++) { + Link link2 = node2.getLink(i); + subContexts.add( + new Context(link2.getTarget(), this, link2, -1)); + } + } + } + + public void nullifySubContextNodes() { + if (subContexts != null) { + for (Context context : subContexts) { + context.transposedNode = null; + context.nullifySubContextNodes(); + } + } + } + } +} diff --git a/main/src/com/google/refine/protograph/transpose/TripleLoaderTransposedNodeFactory.java b/main/src/com/google/refine/protograph/transpose/TripleLoaderTransposedNodeFactory.java new file mode 100644 index 000000000..753528768 --- /dev/null +++ b/main/src/com/google/refine/protograph/transpose/TripleLoaderTransposedNodeFactory.java @@ -0,0 +1,725 @@ +package com.google.refine.protograph.transpose; + +import java.io.IOException; +import java.io.Writer; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; + +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.Recon; +import com.google.refine.model.Recon.Judgment; +import com.google.refine.model.recon.ReconConfig; +import com.google.refine.model.recon.StandardReconConfig; +import com.google.refine.protograph.AnonymousNode; +import com.google.refine.protograph.CellKeyNode; +import com.google.refine.protograph.CellNode; +import com.google.refine.protograph.CellTopicNode; +import com.google.refine.protograph.CellValueNode; +import com.google.refine.protograph.FreebaseProperty; +import com.google.refine.protograph.FreebaseTopic; +import com.google.refine.protograph.FreebaseTopicNode; +import com.google.refine.protograph.Link; +import com.google.refine.protograph.ValueNode; + +public class TripleLoaderTransposedNodeFactory implements TransposedNodeFactory { + protected Project project; + + protected boolean start = true; + protected Writer writer; + protected WritingTransposedNode lastRootNode; + protected Map varPool = new HashMap(); + protected Map newTopicVars = new HashMap(); + protected Set serializedRecons = new HashSet(); + + protected long contextID = 0; + protected int contextRowIndex; + protected int contextRefCount = 0; + protected JSONObject contextTreeRoot; + + public TripleLoaderTransposedNodeFactory(Project project, Writer writer) { + this.project = project; + this.writer = writer; + } + + @Override + public void flush() throws IOException { + if (lastRootNode != null) { + lastRootNode.write(null, null, project, -1, -1, null); + lastRootNode = null; + + writeContextTreeNode(); + } + } + + protected void writeLine(String line) { + try { + if (start) { + start = false; + } else { + writer.write('\n'); + } + writer.write(line); + } catch (IOException e) { + // ignore + } + } + + protected void writeRecon( + StringBuffer sb, + Project project, + int rowIndex, + int cellIndex, + Cell cell + ) { + Recon recon = cell.recon; + + sb.append("\"rec"); sb.append(Long.toString(recon.id)); sb.append("\""); + contextRefCount++; + + if (!serializedRecons.contains(recon.id)) { + serializedRecons.add(recon.id); + + Column column = project.columnModel.getColumnByCellIndex(cellIndex); + + // qa:sample_group + { + StringBuffer sb2 = new StringBuffer(); + + sb2.append("{ \"s\" : \"rec"); + sb2.append(Long.toString(recon.id)); + sb2.append("\", \"p\" : \"qa:sample_group\", \"o\" : "); + sb2.append(JSONObject.quote(column.getName())); + sb2.append(", \"ignore\" : true }"); + + writeLine(sb2.toString()); + } + + // qa:recon_data + { + StringBuffer sb2 = new StringBuffer(); + + String s = cell.value instanceof String ? (String) cell.value : cell.value.toString(); + + sb2.append("{ \"s\" : \"rec"); + sb2.append(Long.toString(recon.id)); + sb2.append("\", \"p\" : \"qa:recon_data\", \"ignore\" : true, \"o\" : { "); + + sb2.append(" \"history_entry\" : "); sb2.append(Long.toString(recon.judgmentHistoryEntry)); + sb2.append(", \"text\" : "); sb2.append(JSONObject.quote(s)); + sb2.append(", \"column\" : "); sb2.append(JSONObject.quote(column.getName())); + sb2.append(", \"service\" : "); sb2.append(JSONObject.quote(recon.service)); + sb2.append(", \"action\" : "); sb2.append(JSONObject.quote(recon.judgmentAction)); + sb2.append(", \"batch\" : "); sb2.append(Integer.toString(recon.judgmentBatchSize)); + + if (recon.judgment == Judgment.Matched) { + sb2.append(", \"matchRank\" : "); sb2.append(Integer.toString(recon.matchRank)); + sb2.append(", \"id\" : "); sb2.append(JSONObject.quote(recon.match.id)); + } + + ReconConfig reconConfig = column.getReconConfig(); + if (reconConfig != null && reconConfig instanceof StandardReconConfig) { + StandardReconConfig standardReconConfig = (StandardReconConfig) reconConfig; + sb2.append(", \"type\" : "); sb2.append(JSONObject.quote(standardReconConfig.typeID)); + } + + sb2.append(" } }"); + + writeLine(sb2.toString()); + } + } + } + + protected void writeLine( + String subject, String predicate, Object object, + Project project, + int subjectRowIndex, int subjectCellIndex, Cell subjectCell, + int objectRowIndex, int objectCellIndex, Cell objectCell, + boolean ignore + ) { + if (subject != null && object != null) { + String s = object instanceof String ? + JSONObject.quote((String) object) : object.toString(); + + StringBuffer sb = new StringBuffer(); + sb.append("{ \"s\" : \""); sb.append(subject); sb.append('"'); + sb.append(", \"p\" : \""); sb.append(predicate); sb.append('"'); + sb.append(", \"o\" : "); sb.append(s); + if (subjectCell != null || objectCell != null) { + sb.append(", \"meta\" : { "); + + sb.append("\"recon\" : { "); + if (subjectCell != null) { + sb.append("\"s\" : "); + writeRecon(sb, project, subjectRowIndex, subjectCellIndex, subjectCell); + } + if (objectCell != null) { + if (subjectCell != null) { + sb.append(", "); + } + sb.append("\"o\" : "); + writeRecon(sb, project, objectRowIndex, objectCellIndex, objectCell); + } + sb.append(" }"); + + sb.append(" }"); + } + if (ignore) { + sb.append(", \"ignore\" : true"); + } + sb.append(" }"); + + writeLine(sb.toString()); + } + } + + protected void writeLine( + String subject, String predicate, Object object, String lang, + Project project, int subjectRowIndex, int subjectCellIndex, Cell subjectCell, + boolean ignore + ) { + if (subject != null && object != null) { + String s = object instanceof String ? + JSONObject.quote((String) object) : object.toString(); + + StringBuffer sb = new StringBuffer(); + sb.append("{ \"s\" : \""); sb.append(subject); sb.append('"'); + sb.append(", \"p\" : \""); sb.append(predicate); sb.append('"'); + sb.append(", \"o\" : "); sb.append(s); + sb.append(", \"lang\" : "); sb.append(lang); + + if (subjectCell != null) { + sb.append(", \"meta\" : { "); + sb.append("\"recon\" : { "); + sb.append("\"s\" : "); + writeRecon(sb, project, subjectRowIndex, subjectCellIndex, subjectCell); + sb.append(" }"); + sb.append(" }"); + } + if (ignore) { + sb.append(", \"ignore\" : true"); + } + sb.append(" }"); + + writeLine(sb.toString()); + } + } + + abstract protected class WritingTransposedNode implements TransposedNode { + JSONObject jsonContextNode; + boolean load; + + public Object write( + String subject, String predicate, Project project, + int subjectRowIndex, int subjectCellIndex, Cell subjectCell) { + + return internalWrite( + subject, predicate, project, + subjectRowIndex, subjectCellIndex, subjectCell); + } + + abstract public Object internalWrite( + String subject, String predicate, Project project, + int subjectRowIndex, int subjectCellIndex, Cell subjectCell); + } + + abstract protected class TransposedNodeWithChildren extends WritingTransposedNode { + public List links = new LinkedList(); + public List rowIndices = new LinkedList(); + public List children = new LinkedList(); + + protected void writeChildren( + String subject, Project project, + int subjectRowIndex, int subjectCellIndex, Cell subjectCell) { + + for (int i = 0; i < children.size(); i++) { + WritingTransposedNode child = children.get(i); + Link link = links.get(i); + String predicate = link.property.id; + + child.write(subject, predicate, project, + subjectRowIndex, subjectCellIndex, subjectCell); + } + } + } + + protected class AnonymousTransposedNode extends TransposedNodeWithChildren { + + //protected AnonymousTransposedNode(AnonymousNode node) { } + + public Object internalWrite(String subject, String predicate, Project project, int subjectRowIndex, int subjectCellIndex, Cell subjectCell) { + if (children.size() == 0 || subject == null) { + return null; + } + + StringBuffer sb = new StringBuffer(); + sb.append("{ \"s\" : \""); sb.append(subject); sb.append('"'); + sb.append(", \"p\" : \""); sb.append(predicate); sb.append('"'); + sb.append(", \"o\" : { "); + + StringBuffer sbRecon = new StringBuffer(); + + boolean first = true; + boolean firstRecon = true; + + if (subjectCell.recon != null) { + sbRecon.append("\"s\" : "); + writeRecon(sbRecon, project, subjectRowIndex, subjectCellIndex, subjectCell); + + firstRecon = false; + } + + for (int i = 0; i < children.size(); i++) { + WritingTransposedNode child = children.get(i); + Link link = links.get(i); + + FreebaseProperty property = link.property; + + Object c = child.internalWrite(null, null, project, subjectRowIndex, subjectCellIndex, null); + if (c != null) { + if (first) { + first = false; + } else { + sb.append(", "); + } + sb.append("\"" + property.id + "\": "); + sb.append(c instanceof String ? JSONObject.quote((String) c) : c.toString()); + } + + if (child instanceof CellTopicTransposedNode) { + CellTopicTransposedNode child2 = (CellTopicTransposedNode) child; + Recon recon = child2.cell.recon; + + if (recon != null && + (recon.judgment == Judgment.Matched || recon.judgment == Judgment.New)) { + + if (firstRecon) { + firstRecon = false; + } else { + sbRecon.append(", "); + } + + sbRecon.append("\""); sbRecon.append(property.id); sbRecon.append("\" : "); + + writeRecon(sbRecon, project, + rowIndices.get(i), child2.cellIndex, child2.cell); + } + } + } + sb.append(" }, \"meta\" : { \"recon\" : { "); + sb.append(sbRecon.toString()); + sb.append(" } } }"); + + writeLine(sb.toString()); + + return null; + } + } + + protected class CellTopicTransposedNode extends TransposedNodeWithChildren { + protected CellTopicNode node; + protected int rowIndex; + protected int cellIndex; + protected Cell cell; + + public CellTopicTransposedNode(CellTopicNode node, int rowIndex, int cellIndex, Cell cell) { + this.node = node; + this.rowIndex = rowIndex; + this.cellIndex = cellIndex; + this.cell = cell; + } + + public Object internalWrite(String subject, String predicate, Project project, int subjectRowIndex, int subjectCellIndex, Cell subjectCell) { + String id = null; + if (cell.recon != null && cell.recon.judgment != Recon.Judgment.None) { + int objectRowIndex = rowIndex; + int objectCellIndex = cellIndex; + Cell objectCell = cell; + + if (cell.recon.judgment == Recon.Judgment.Matched) { + id = cell.recon.match.id; + + } else if (cell.recon.judgment == Judgment.New) { + if (newTopicVars.containsKey(cell.recon.id)) { + id = newTopicVars.get(cell.recon.id); + } else { + Column column = project.columnModel.getColumnByCellIndex(cellIndex); + String columnName = column.getName(); + + long var = 0; + if (varPool.containsKey(columnName)) { + var = varPool.get(columnName); + } + varPool.put(columnName, var + 1); + + id = "$" + columnName.replaceAll("\\W+", "_") + "_" + var; + + String typeID = node.type.id; + + ReconConfig reconConfig = column.getReconConfig(); + if (reconConfig instanceof StandardReconConfig) { + typeID = ((StandardReconConfig) reconConfig).typeID; + } + + writeLine(id, "type", typeID, project, rowIndex, cellIndex, cell, -1, -1, (Cell) null, !load); + writeLine(id, "name", cell.value, project, -1, -1, (Cell) null, -1, -1, (Cell) null, !load); + + if (cell.recon != null) { + newTopicVars.put(cell.recon.id, id); + } + } + } else { + return null; + } + + if (subject != null) { + writeLine(subject, predicate, id, project, + subjectRowIndex, subjectCellIndex, subjectCell, + objectRowIndex, objectCellIndex, objectCell, !load); + } + + writeChildren(id, project, objectRowIndex, objectCellIndex, objectCell); + } + + return id; + } + } + + protected class CellValueTransposedNode extends WritingTransposedNode { + protected JSONObject obj; + protected CellValueNode node; + protected int rowIndex; + protected int cellIndex; + protected Cell cell; + + public CellValueTransposedNode(CellValueNode node, int rowIndex, int cellIndex, Cell cell) { + this.node = node; + this.rowIndex = rowIndex; + this.cellIndex = cellIndex; + this.cell = cell; + } + + public Object internalWrite(String subject, String predicate, Project project, int subjectRowIndex, int subjectCellIndex, Cell subjectCell) { + if (subject != null) { + if ("/type/text".equals(node.lang)) { + writeLine(subject, predicate, cell.value, node.lang, project, + subjectRowIndex, subjectCellIndex, subjectCell, !load); + } else { + writeLine(subject, predicate, cell.value, project, + subjectRowIndex, subjectCellIndex, subjectCell, + -1, -1, null, !load); + } + } + + return cell.value; + } + } + + protected class CellKeyTransposedNode extends WritingTransposedNode { + protected CellKeyNode node; + protected int rowIndex; + protected int cellIndex; + protected Cell cell; + + public CellKeyTransposedNode(CellKeyNode node, int rowIndex, int cellIndex, Cell cell) { + this.node = node; + this.rowIndex = rowIndex; + this.cellIndex = cellIndex; + this.cell = cell; + } + + public Object internalWrite(String subject, String predicate, Project project, int subjectRowIndex, int subjectCellIndex, Cell subjectCell) { + writeLine(subject, "key", node.namespace.id + "/" + cell.value, project, + subjectRowIndex, subjectCellIndex, subjectCell, + -1, -1, null, !load); + + return null; + } + } + + protected class TopicTransposedNode extends TransposedNodeWithChildren { + protected FreebaseTopicNode node; + + public TopicTransposedNode(FreebaseTopicNode node) { + this.node = node; + } + + public Object internalWrite(String subject, String predicate, Project project, int subjectRowIndex, int subjectCellIndex, Cell subjectCell) { + writeLine(subject, predicate, node.topic.id, project, + subjectRowIndex, subjectCellIndex, subjectCell, + -1, -1, null, !load); + + writeChildren(node.topic.id, project, -1, -1, null); + + return node.topic.id; + } + } + + protected class ValueTransposedNode extends WritingTransposedNode { + protected ValueNode node; + + public ValueTransposedNode(ValueNode node) { + this.node = node; + } + + public Object internalWrite(String subject, String predicate, Project project, int subjectRowIndex, int subjectCellIndex, Cell subjectCell) { + if ("/type/text".equals(node.lang)) { + writeLine(subject, predicate, node.value, node.lang, project, + subjectRowIndex, subjectCellIndex, subjectCell, !load); + } else { + writeLine(subject, predicate, node.value, project, + subjectRowIndex, subjectCellIndex, subjectCell, + -1, -1, null, !load); + } + + return node.value; + } + } + + public TransposedNode transposeAnonymousNode( + TransposedNode parentNode, + Link link, + AnonymousNode node, int rowIndex) { + + WritingTransposedNode parentNode2 = (WritingTransposedNode) parentNode; + WritingTransposedNode tnode = new AnonymousTransposedNode(); + + tnode.load = + (parentNode2 == null || parentNode2.load) && + (link == null || link.load); + + processTransposedNode(tnode, parentNode, link, rowIndex); + + tnode.jsonContextNode = addJsonContext( + parentNode2 != null ? parentNode2.jsonContextNode : null, + link != null ? link.property.id : null, + null + ); + + return tnode; + } + + public TransposedNode transposeCellNode( + TransposedNode parentNode, + Link link, + CellNode node, + int rowIndex, + int cellIndex, + Cell cell) { + + WritingTransposedNode parentNode2 = (WritingTransposedNode) parentNode; + + WritingTransposedNode tnode = null; + if (node instanceof CellTopicNode) { + if (cell.recon != null && + (cell.recon.judgment == Judgment.Matched || + cell.recon.judgment == Judgment.New)) { + + tnode = new CellTopicTransposedNode( + (CellTopicNode) node, rowIndex, cellIndex, cell); + } + } else if (node instanceof CellValueNode) { + tnode = new CellValueTransposedNode((CellValueNode) node, rowIndex, cellIndex, cell); + } else if (node instanceof CellKeyNode) { + tnode = new CellKeyTransposedNode((CellKeyNode) node, rowIndex, cellIndex, cell); + } + + if (tnode != null) { + tnode.load = + (parentNode2 == null || parentNode2.load) && + (link == null || link.load); + + processTransposedNode(tnode, parentNode, link, rowIndex); + + tnode.jsonContextNode = addJsonContext( + parentNode2 != null ? parentNode2.jsonContextNode : null, + link != null ? link.property.id : null, + cell, + rowIndex + ); + } + return tnode; + } + + public TransposedNode transposeTopicNode( + TransposedNode parentNode, + Link link, + FreebaseTopicNode node, + int rowIndex) { + + WritingTransposedNode parentNode2 = (WritingTransposedNode) parentNode; + WritingTransposedNode tnode = new TopicTransposedNode(node); + + tnode.load = + (parentNode2 == null || parentNode2.load) && + (link == null || link.load); + + processTransposedNode(tnode, parentNode, link, rowIndex); + + tnode.jsonContextNode = addJsonContext( + parentNode2 != null ? parentNode2.jsonContextNode : null, + link != null ? link.property.id : null, + node.topic + ); + + return tnode; + } + + public TransposedNode transposeValueNode( + TransposedNode parentNode, + Link link, + ValueNode node, + int rowIndex) { + + WritingTransposedNode parentNode2 = (WritingTransposedNode) parentNode; + WritingTransposedNode tnode = new ValueTransposedNode(node); + + tnode.load = + (parentNode2 == null || parentNode2.load) && + (link == null || link.load); + + processTransposedNode(tnode, parentNode, link, rowIndex); + + tnode.jsonContextNode = addJsonContext( + parentNode2 != null ? parentNode2.jsonContextNode : null, + link != null ? link.property.id : null, + node.value + ); + + return tnode; + } + + protected void processTransposedNode( + WritingTransposedNode tnode, + TransposedNode parentNode, + Link link, + int rowIndex + ) { + if (parentNode != null) { + if (parentNode instanceof TransposedNodeWithChildren) { + TransposedNodeWithChildren parentNode2 = (TransposedNodeWithChildren) parentNode; + parentNode2.rowIndices.add(rowIndex); + parentNode2.children.add(tnode); + parentNode2.links.add(link); + } + } else { + addRootNode(tnode, rowIndex); + } + } + + protected JSONObject addJsonContext(JSONObject parent, String key, Object value) { + JSONObject o = new JSONObject(); + + try { + if (value instanceof FreebaseTopic) { + FreebaseTopic topic = (FreebaseTopic) value; + o.put("id", topic.id); + o.put("name", topic.name); + } else { + o.put("v", value); + } + } catch (JSONException e) { + // ignore + } + + connectJsonContext(parent, o, key); + return o; + } + + protected JSONObject addJsonContext(JSONObject parent, String key, Cell cell, int rowIndex) { + JSONObject o = new JSONObject(); + + connectJsonContext(parent, o, key); + + try { + if (cell != null) { + o.put("v", cell.value); + if (cell.recon != null) { + o.put("recon", "rec" + cell.recon.id); + + if (cell.recon.judgment == Judgment.Matched) { + o.put("id", cell.recon.match.id); + o.put("name", cell.recon.match.name); + } + + // qa:display_context + { + StringBuffer sb2 = new StringBuffer(); + + sb2.append("{ \"ignore\" : true, \"s\" : \"rec"); + sb2.append(Long.toString(cell.recon.id)); + sb2.append("\", \"p\" : \"qa:display_context\", \"o\" : \"ctx"); + sb2.append(Long.toString(contextID)); + sb2.append("\", \"meta\" : { \"row\" : "); + sb2.append(Integer.toString(rowIndex)); + sb2.append(" } }"); + + writeLine(sb2.toString()); + } + } + } + } catch (JSONException e) { + // ignore + } + + return o; + } + + protected void connectJsonContext(JSONObject parent, JSONObject o, String key) { + try { + if (parent == null) { + contextTreeRoot = o; + } else { + JSONArray a = null; + if (parent.has(key)) { + a = parent.getJSONArray(key); + } else { + a = new JSONArray(); + parent.put(key, a); + } + + a.put(o); + } + } catch (JSONException e) { + // ignore + } + } + + protected void addRootNode(WritingTransposedNode tnode, int rowIndex) { + if (lastRootNode != null) { + lastRootNode.write(null, null, project, -1, -1, null); + writeContextTreeNode(); + } + lastRootNode = tnode; + + contextTreeRoot = null; + contextRowIndex = rowIndex; + contextRefCount = 0; + contextID++; + } + + protected void writeContextTreeNode() { + if (contextTreeRoot != null && contextRefCount > 0) { + StringBuffer sb = new StringBuffer(); + + sb.append("{ \"ignore\" : true, \"s\" : \"ctx"); + sb.append(Long.toString(contextID)); + sb.append("\", \"p\" : \"qa:context_data\", \"o\" : { \"row\" : "); + sb.append(Integer.toString(contextRowIndex)); + sb.append(", \"data\" : "); + sb.append(contextTreeRoot.toString()); + sb.append(" } }"); + + writeLine(sb.toString()); + } + } +} diff --git a/main/src/com/google/refine/sorting/BaseSorter.java b/main/src/com/google/refine/sorting/BaseSorter.java new file mode 100644 index 000000000..84799ef30 --- /dev/null +++ b/main/src/com/google/refine/sorting/BaseSorter.java @@ -0,0 +1,140 @@ +package com.google.refine.sorting; + +import java.util.List; + +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; + +import com.google.refine.expr.EvalError; +import com.google.refine.model.Project; +import com.google.refine.sorting.Criterion.KeyMaker; + +abstract public class BaseSorter { + protected Criterion[] _criteria; + protected KeyMaker[] _keyMakers; + protected ComparatorWrapper[] _comparatorWrappers; + protected List _keys; + + public class ComparatorWrapper { + final public int criterionIndex; + final protected int multiplier; + + public ComparatorWrapper(int criterionIndex) { + this.criterionIndex = criterionIndex; + this.multiplier = _criteria[criterionIndex].reverse ? -1 : 1; + } + + public Object getKey(Project project, Object o, int index) { + while (index >= _keys.size()) { + _keys.add(null); + } + + Object[] keys = _keys.get(index); + if (keys == null) { + keys = makeKeys(project, o, index); + _keys.set(index, keys); + } + return keys[criterionIndex]; + } + + public int compare(Project project, Object o1, int i1, Object o2, int i2) { + Criterion c = _criteria[criterionIndex]; + Object key1 = getKey(project, o1, i1); + Object key2 = getKey(project, o2, i2); + + if (key1 == null) { + if (key2 == null) { + return 0; + } else if (key2 instanceof EvalError) { + return c.blankPosition - c.errorPosition; + } else { + return c.blankPosition; + } + } else if (key1 instanceof EvalError) { + if (key2 == null) { + return c.errorPosition - c.blankPosition; + } else if (key2 instanceof EvalError) { + return 0; + } else { + return c.errorPosition; + } + } else { + if (key2 == null) { + return -c.blankPosition; + } else if (key2 instanceof EvalError) { + return -c.errorPosition; + } else { + return _keyMakers[criterionIndex].compareKeys(key1, key2) * multiplier; + } + } + } + } + + public void initializeFromJSON(Project project, JSONObject obj) throws JSONException { + if (obj.has("criteria") && !obj.isNull("criteria")) { + JSONArray a = obj.getJSONArray("criteria"); + int count = a.length(); + + _criteria = new Criterion[count]; + _keyMakers = new KeyMaker[count]; + _comparatorWrappers = new ComparatorWrapper[count]; + + for (int i = 0; i < count; i++) { + JSONObject obj2 = a.getJSONObject(i); + + _criteria[i] = createCriterionFromJSON(project, obj2); + _keyMakers[i] = _criteria[i].createKeyMaker(); + _comparatorWrappers[i] = new ComparatorWrapper(i); + } + } else { + _criteria = new Criterion[0]; + _keyMakers = new KeyMaker[0]; + _comparatorWrappers = new ComparatorWrapper[0]; + } + } + + public boolean hasCriteria() { + return _criteria != null && _criteria.length > 0; + } + + protected Criterion createCriterionFromJSON(Project project, JSONObject obj) throws JSONException { + String valueType = "string"; + if (obj.has("valueType") && !obj.isNull("valueType")) { + valueType = obj.getString("valueType"); + } + + Criterion c = null; + if ("boolean".equals(valueType)) { + c = new BooleanCriterion(); + } else if ("date".equals(valueType)) { + c = new DateCriterion(); + } else if ("number".equals(valueType)) { + c = new NumberCriterion(); + } else { + c = new StringCriterion(); + } + + c.initializeFromJSON(project, obj); + return c; + } + + abstract protected Object makeKey( + Project project, KeyMaker keyMaker, Criterion c, Object o, int index); + + protected Object[] makeKeys(Project project, Object o, int index) { + Object[] keys = new Object[_keyMakers.length]; + for (int i = 0; i < keys.length; i++) { + keys[i] = makeKey(project, _keyMakers[i], _criteria[i], o, index); + } + return keys; + } + + protected int compare(Project project, Object o1, int i1, Object o2, int i2) { + int c = 0; + for (int i = 0; c == 0 && i < _comparatorWrappers.length; i++) { + c = _comparatorWrappers[i].compare(project, o1, i1, o2, i2); + } + return c; + } +} diff --git a/main/src/com/google/refine/sorting/BooleanCriterion.java b/main/src/com/google/refine/sorting/BooleanCriterion.java new file mode 100644 index 000000000..e9b42fa55 --- /dev/null +++ b/main/src/com/google/refine/sorting/BooleanCriterion.java @@ -0,0 +1,32 @@ +package com.google.refine.sorting; + +import com.google.refine.expr.EvalError; +import com.google.refine.expr.ExpressionUtils; + +public class BooleanCriterion extends Criterion { + final static protected EvalError s_error = new EvalError("Not a boolean"); + + @Override + public KeyMaker createKeyMaker() { + return new KeyMaker() { + @Override + protected Object makeKey(Object value) { + if (ExpressionUtils.isNonBlankData(value)) { + if (value instanceof Boolean) { + return value; + } else if (value instanceof String) { + return Boolean.parseBoolean((String) value); + } else { + return s_error; + } + } + return value; + } + + @Override + public int compareKeys(Object key1, Object key2) { + return ((Boolean) key1).compareTo((Boolean) key2); + } + }; + } +} diff --git a/main/src/com/google/refine/sorting/Criterion.java b/main/src/com/google/refine/sorting/Criterion.java new file mode 100644 index 000000000..ac17bed28 --- /dev/null +++ b/main/src/com/google/refine/sorting/Criterion.java @@ -0,0 +1,94 @@ +package com.google.refine.sorting; + +import org.json.JSONException; +import org.json.JSONObject; + +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.Record; +import com.google.refine.model.Row; + +abstract public class Criterion { + public String columnName; + protected int cellIndex; + + // These take on positive and negative values to indicate where blanks and errors + // go relative to non-blank values. They are also relative to each another. + // Blanks and errors are not affected by the reverse flag. + public int blankPosition = 1; + public int errorPosition = 2; + + public boolean reverse; + + public void initializeFromJSON(Project project, JSONObject obj) throws JSONException { + if (obj.has("column") && !obj.isNull("column")) { + columnName = obj.getString("column"); + + Column column = project.columnModel.getColumnByName(columnName); + cellIndex = column != null ? column.getCellIndex() : -1; + } + + if (obj.has("blankPosition") && !obj.isNull("blankPosition")) { + blankPosition = obj.getInt("blankPosition"); + } + if (obj.has("errorPosition") && !obj.isNull("errorPosition")) { + errorPosition = obj.getInt("errorPosition"); + } + + if (obj.has("reverse") && !obj.isNull("reverse")) { + reverse = obj.getBoolean("reverse"); + } + } + + abstract public class KeyMaker { + public Object makeKey(Project project, Record record) { + Object error = null; + Object finalKey = null; + + for (int r = record.fromRowIndex; r < record.toRowIndex; r++) { + Object key = makeKey(project, project.rows.get(r), r); + if (ExpressionUtils.isError(key)) { + error = key; + } else if (ExpressionUtils.isNonBlankData(key)) { + if (finalKey == null) { + finalKey = key; + } else { + int c = compareKeys(finalKey, key); + if (reverse) { + if (c < 0) { // key > finalKey + finalKey = key; + } + } else { + if (c > 0) { // key < finalKey + finalKey = key; + } + } + } + } + } + + if (finalKey != null) { + return finalKey; + } else if (error != null) { + return error; + } else { + return null; + } + } + + public Object makeKey(Project project, Row row, int rowIndex) { + if (cellIndex < 0) { + return null; + } else { + Object value = row.getCellValue(cellIndex); + return makeKey(value); + } + } + + abstract public int compareKeys(Object key1, Object key2); + + abstract protected Object makeKey(Object value); + } + abstract public KeyMaker createKeyMaker(); +} diff --git a/main/src/com/google/refine/sorting/DateCriterion.java b/main/src/com/google/refine/sorting/DateCriterion.java new file mode 100644 index 000000000..0c5aab0ab --- /dev/null +++ b/main/src/com/google/refine/sorting/DateCriterion.java @@ -0,0 +1,35 @@ +package com.google.refine.sorting; + +import java.util.Calendar; +import java.util.Date; + +import com.google.refine.expr.EvalError; +import com.google.refine.expr.ExpressionUtils; + +public class DateCriterion extends Criterion { + final static protected EvalError s_error = new EvalError("Not a date"); + + @Override + public KeyMaker createKeyMaker() { + return new KeyMaker() { + @Override + protected Object makeKey(Object value) { + if (ExpressionUtils.isNonBlankData(value)) { + if (value instanceof Date) { + return value; + } else if (value instanceof Calendar) { + return ((Calendar) value).getTime(); + } else { + return s_error; + } + } + return value; + } + + @Override + public int compareKeys(Object key1, Object key2) { + return ((Date) key1).compareTo((Date) key2); + } + }; + } +} diff --git a/main/src/com/google/refine/sorting/NumberCriterion.java b/main/src/com/google/refine/sorting/NumberCriterion.java new file mode 100644 index 000000000..b70f855ca --- /dev/null +++ b/main/src/com/google/refine/sorting/NumberCriterion.java @@ -0,0 +1,50 @@ +package com.google.refine.sorting; + +import java.util.Calendar; +import java.util.Date; + +import com.google.refine.expr.EvalError; +import com.google.refine.expr.ExpressionUtils; + +public class NumberCriterion extends Criterion { + + final static protected EvalError s_error = new EvalError("Not a number"); + + @Override + public KeyMaker createKeyMaker() { + return new KeyMaker() { + @Override + protected Object makeKey(Object value) { + if (ExpressionUtils.isNonBlankData(value)) { + if (value instanceof Number) { + return value; + } else if (value instanceof Boolean) { + return ((Boolean) value).booleanValue() ? 1 : 0; + } else if (value instanceof Date) { + return ((Date) value).getTime(); + } else if (value instanceof Calendar) { + return ((Calendar) value).getTime().getTime(); + } else if (value instanceof String) { + try { + double d = Double.parseDouble((String) value); + if (!Double.isNaN(d)) { + return d; + } + } catch (NumberFormatException e) { + // fall through + } + } + return s_error; + } + return value; + } + + @Override + public int compareKeys(Object key1, Object key2) { + double d1 = ((Number) key1).doubleValue(); + double d2 = ((Number) key2).doubleValue(); + return d1 < d2 ? -1 : (d1 > d2 ? 1 : 0); + } + }; + } +} diff --git a/main/src/com/google/refine/sorting/SortingRecordVisitor.java b/main/src/com/google/refine/sorting/SortingRecordVisitor.java new file mode 100644 index 000000000..745ea0e55 --- /dev/null +++ b/main/src/com/google/refine/sorting/SortingRecordVisitor.java @@ -0,0 +1,65 @@ +package com.google.refine.sorting; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; + +import com.google.refine.browsing.RecordVisitor; +import com.google.refine.model.Project; +import com.google.refine.model.Record; +import com.google.refine.sorting.Criterion.KeyMaker; + +public class SortingRecordVisitor extends BaseSorter implements RecordVisitor { + final protected RecordVisitor _visitor; + protected List _records; + + public SortingRecordVisitor(RecordVisitor visitor) { + _visitor = visitor; + } + + @Override + public void start(Project project) { + int count = project.recordModel.getRecordCount(); + _records = new ArrayList(count); + _keys = new ArrayList(count); + } + + @Override + public void end(Project project) { + _visitor.start(project); + + Collections.sort(_records, new Comparator() { + Project project; + + Comparator init(Project project) { + this.project = project; + return this; + } + + @Override + public int compare(Record o1, Record o2) { + return SortingRecordVisitor.this.compare(project, o1, o1.recordIndex, o2, o2.recordIndex); + } + }.init(project)); + + for (Record record : _records) { + _visitor.visit(project, record); + } + + _visitor.end(project); + } + + @Override + public boolean visit(Project project, Record record) { + _records.add(record); + return false; + } + + @Override + protected Object makeKey( + Project project, KeyMaker keyMaker, Criterion c, Object o, int index) { + + return keyMaker.makeKey(project, (Record) o); + } +} diff --git a/main/src/com/google/refine/sorting/SortingRowVisitor.java b/main/src/com/google/refine/sorting/SortingRowVisitor.java new file mode 100644 index 000000000..cb8296b62 --- /dev/null +++ b/main/src/com/google/refine/sorting/SortingRowVisitor.java @@ -0,0 +1,75 @@ +package com.google.refine.sorting; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; + +import com.google.refine.browsing.RowVisitor; +import com.google.refine.model.Project; +import com.google.refine.model.Row; +import com.google.refine.sorting.Criterion.KeyMaker; + +public class SortingRowVisitor extends BaseSorter implements RowVisitor { + final protected RowVisitor _visitor; + protected List _indexedRows; + + static protected class IndexedRow { + final int index; + final Row row; + + IndexedRow(int index, Row row) { + this.index = index; + this.row = row; + } + } + + public SortingRowVisitor(RowVisitor visitor) { + _visitor = visitor; + } + + @Override + public void start(Project project) { + int count = project.rows.size(); + _indexedRows = new ArrayList(count); + _keys = new ArrayList(count); + } + + @Override + public void end(Project project) { + _visitor.start(project); + + Collections.sort(_indexedRows, new Comparator() { + Project project; + + Comparator init(Project project) { + this.project = project; + return this; + } + + @Override + public int compare(IndexedRow o1, IndexedRow o2) { + return SortingRowVisitor.this.compare(project, o1.row, o1.index, o2.row, o2.index); + } + }.init(project)); + + for (IndexedRow indexedRow : _indexedRows) { + _visitor.visit(project, indexedRow.index, indexedRow.row); + } + + _visitor.end(project); + } + + @Override + public boolean visit(Project project, int rowIndex, Row row) { + _indexedRows.add(new IndexedRow(rowIndex, row)); + return false; + } + + @Override + protected Object makeKey( + Project project, KeyMaker keyMaker, Criterion c, Object o, int index) { + + return keyMaker.makeKey(project, (Row) o, index); + } +} diff --git a/main/src/com/google/refine/sorting/StringCriterion.java b/main/src/com/google/refine/sorting/StringCriterion.java new file mode 100644 index 000000000..1abcef669 --- /dev/null +++ b/main/src/com/google/refine/sorting/StringCriterion.java @@ -0,0 +1,40 @@ +package com.google.refine.sorting; + +import org.json.JSONException; +import org.json.JSONObject; + +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.model.Project; + +public class StringCriterion extends Criterion { + public boolean caseSensitive; + + @Override + public void initializeFromJSON(Project project, JSONObject obj) throws JSONException { + super.initializeFromJSON(project, obj); + + if (obj.has("caseSensitive") && !obj.isNull("caseSensitive")) { + caseSensitive = obj.getBoolean("caseSensitive"); + } + } + + @Override + public KeyMaker createKeyMaker() { + return new KeyMaker() { + @Override + protected Object makeKey(Object value) { + return (ExpressionUtils.isNonBlankData(value) && !(value instanceof String)) ? + value.toString() : value; + } + + @Override + public int compareKeys(Object key1, Object key2) { + if (StringCriterion.this.caseSensitive) { + return ((String) key1).compareTo((String) key2); + } else { + return ((String) key1).compareToIgnoreCase((String) key2); + } + } + }; + } +} diff --git a/main/src/com/google/refine/templating/DynamicFragment.java b/main/src/com/google/refine/templating/DynamicFragment.java new file mode 100644 index 000000000..7bf172275 --- /dev/null +++ b/main/src/com/google/refine/templating/DynamicFragment.java @@ -0,0 +1,11 @@ +package com.google.refine.templating; + +import com.google.refine.expr.Evaluable; + +class DynamicFragment extends Fragment { + final public Evaluable eval; + + public DynamicFragment(Evaluable eval) { + this.eval = eval; + } +} diff --git a/main/src/com/google/refine/templating/Fragment.java b/main/src/com/google/refine/templating/Fragment.java new file mode 100644 index 000000000..a8d84e7c6 --- /dev/null +++ b/main/src/com/google/refine/templating/Fragment.java @@ -0,0 +1,5 @@ +package com.google.refine.templating; + +public class Fragment { + +} diff --git a/main/src/com/google/refine/templating/Parser.java b/main/src/com/google/refine/templating/Parser.java new file mode 100644 index 000000000..829f93444 --- /dev/null +++ b/main/src/com/google/refine/templating/Parser.java @@ -0,0 +1,71 @@ +package com.google.refine.templating; + +import java.util.ArrayList; +import java.util.List; + +import com.google.refine.expr.MetaParser; +import com.google.refine.expr.ParsingException; +import com.google.refine.gel.ast.FieldAccessorExpr; +import com.google.refine.gel.ast.VariableExpr; + +public class Parser { + static public Template parse(String s) throws ParsingException { + List fragments = new ArrayList(); + + int start = 0, current = 0; + while (current < s.length() - 1) { + char c = s.charAt(current); + if (c == '\\') { + current += 2; + continue; + } + + char c2 = s.charAt(current + 1); + if (c == '$' && c2 == '{') { + int closeBrace = s.indexOf('}', current + 2); + if (closeBrace > current + 1) { + String columnName = s.substring(current + 2, closeBrace); + + if (current > start) { + fragments.add(new StaticFragment(s.substring(start, current))); + } + start = current = closeBrace + 1; + + fragments.add( + new DynamicFragment( + new FieldAccessorExpr( + new FieldAccessorExpr( + new VariableExpr("cells"), + columnName), + "value"))); + + continue; + } + } else if (c == '{' && c2 == '{') { + int closeBrace = s.indexOf('}', current + 2); + if (closeBrace > current + 1 && closeBrace < s.length() - 1 && s.charAt(closeBrace + 1) == '}') { + String expression = s.substring(current + 2, closeBrace); + + if (current > start) { + fragments.add(new StaticFragment(s.substring(start, current))); + } + start = current = closeBrace + 2; + + fragments.add( + new DynamicFragment( + MetaParser.parse(expression))); + + continue; + } + } + + current++; + } + + if (start < s.length()) { + fragments.add(new StaticFragment(s.substring(start))); + } + + return new Template(fragments); + } +} diff --git a/main/src/com/google/refine/templating/StaticFragment.java b/main/src/com/google/refine/templating/StaticFragment.java new file mode 100644 index 000000000..61342c217 --- /dev/null +++ b/main/src/com/google/refine/templating/StaticFragment.java @@ -0,0 +1,9 @@ +package com.google.refine.templating; + +class StaticFragment extends Fragment { + final public String text; + + public StaticFragment(String text) { + this.text = text; + } +} diff --git a/main/src/com/google/refine/templating/Template.java b/main/src/com/google/refine/templating/Template.java new file mode 100644 index 000000000..a79b9641a --- /dev/null +++ b/main/src/com/google/refine/templating/Template.java @@ -0,0 +1,173 @@ +package com.google.refine.templating; + +import java.io.IOException; +import java.io.Writer; +import java.util.Collection; +import java.util.List; +import java.util.Properties; + +import com.google.refine.browsing.RecordVisitor; +import com.google.refine.browsing.RowVisitor; +import com.google.refine.expr.ExpressionUtils; +import com.google.refine.model.Project; +import com.google.refine.model.Record; +import com.google.refine.model.Row; + +public class Template { + protected String _prefix; + protected String _suffix; + protected String _separator; + + protected List _fragments; + + public Template(List fragments) { + _fragments = fragments; + } + + public void setPrefix(String prefix) { + _prefix = prefix; + } + + public void setSuffix(String suffix) { + _suffix = suffix; + } + + public void setSeparator(String separator) { + _separator = separator; + } + + public RowVisitor getRowVisitor(Writer writer, int limit) { + return get(writer, limit); + } + + public RecordVisitor getRecordVisitor(Writer writer, int limit) { + return get(writer, limit); + } + + protected RowWritingVisitor get(Writer writer, int limit) { + return new RowWritingVisitor(writer, limit); + } + + protected class RowWritingVisitor implements RowVisitor, RecordVisitor { + final protected int limit; + final protected Writer writer; + protected Properties bindings; + + public int total; + + public RowWritingVisitor(Writer writer, int limit) { + this.limit = limit; + this.writer = writer; + } + + @Override + public void start(Project project) { + bindings = ExpressionUtils.createBindings(project); + + try { + if (_prefix != null) { + writer.write(_prefix); + } + } catch (IOException e) { + // ignore + } + } + + @Override + public void end(Project project) { + try { + if (_suffix != null) { + writer.write(_suffix); + } + } catch (IOException e) { + // ignore + } + } + + public boolean visit(Project project, int rowIndex, Row row) { + if (limit <= 0 || total < limit) { + internalVisit(project, rowIndex, row); + } + total++; + + return limit > 0 && total >= limit; + } + + @Override + public boolean visit(Project project, Record record) { + if (limit <= 0 || total < limit) { + internalVisit(project, record); + } + total++; + + return limit > 0 && total >= limit; + } + + protected void writeValue(Object v) throws IOException { + if (v == null) { + writer.write("null"); + } else if (ExpressionUtils.isError(v)) { + writer.write("null"); + //writer.write("[Error: " + ((EvalError) v).message); + } else if (v instanceof String) { + writer.write((String) v); + } else { + writer.write(v.toString()); + } + } + + public boolean internalVisit(Project project, int rowIndex, Row row) { + try { + if (total > 0 && _separator != null) { + writer.write(_separator); + } + + ExpressionUtils.bind(bindings, row, rowIndex, null, null); + for (Fragment f : _fragments) { + if (f instanceof StaticFragment) { + writer.write(((StaticFragment) f).text); + } else { + DynamicFragment df = (DynamicFragment) f; + Object value = df.eval.evaluate(bindings); + + if (value != null && ExpressionUtils.isArrayOrCollection(value)) { + if (ExpressionUtils.isArray(value)) { + Object[] a = (Object[]) value; + for (Object v : a) { + writeValue(v); + } + } else { + Collection a = ExpressionUtils.toObjectCollection(value); + for (Object v : a) { + writeValue(v); + } + } + continue; + } + + writeValue(value); + } + } + } catch (IOException e) { + // ignore + } + return false; + } + + protected boolean internalVisit(Project project, Record record) { + bindings.put("recordIndex", record.recordIndex); + + for (int r = record.fromRowIndex; r < record.toRowIndex; r++) { + Row row = project.rows.get(r); + + bindings.put("rowIndex", r); + + internalVisit(project, r, row); + + bindings.remove("recordIndex"); + } + return false; + } + } + +} diff --git a/main/src/com/google/refine/util/CookiesUtilities.java b/main/src/com/google/refine/util/CookiesUtilities.java new file mode 100644 index 000000000..62304057d --- /dev/null +++ b/main/src/com/google/refine/util/CookiesUtilities.java @@ -0,0 +1,48 @@ +package com.google.refine.util; + +import javax.servlet.http.Cookie; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +public class CookiesUtilities { + + public static final String DOMAIN = "127.0.0.1"; + public static final String PATH = "/"; + + public static Cookie getCookie(HttpServletRequest request, String name) { + if (name == null) throw new RuntimeException("cookie name cannot be null"); + Cookie cookie = null; + Cookie[] cookies = request.getCookies(); + if (cookies != null) { + for (Cookie c : cookies) { + if (name.equals(c.getName())) { + cookie = c; + } + } + } + return cookie; + } + + public static void setCookie(HttpServletRequest request, HttpServletResponse response, String name, String value, int max_age) { + Cookie c = new Cookie(name, value); + c.setDomain(getDomain(request)); + c.setPath(PATH); + c.setMaxAge(max_age); + response.addCookie(c); + } + + public static void deleteCookie(HttpServletRequest request, HttpServletResponse response, String name) { + Cookie c = new Cookie(name, ""); + c.setDomain(getDomain(request)); + c.setPath(PATH); + c.setMaxAge(0); + response.addCookie(c); + } + + public static String getDomain(HttpServletRequest request) { + String host = request.getHeader("Host"); + if (host == null) return DOMAIN; + int index = host.indexOf(':'); + return (index > -1) ? host.substring(0,index) : host ; + } +} diff --git a/main/src/com/google/refine/util/FreebaseDataExtensionJob.java b/main/src/com/google/refine/util/FreebaseDataExtensionJob.java new file mode 100644 index 000000000..efedcd213 --- /dev/null +++ b/main/src/com/google/refine/util/FreebaseDataExtensionJob.java @@ -0,0 +1,415 @@ +/** + * + */ +package com.google.refine.util; + +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.Serializable; +import java.io.StringWriter; +import java.io.Writer; +import java.net.URL; +import java.net.URLConnection; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +import com.google.refine.model.ReconCandidate; +import com.google.refine.protograph.FreebaseType; + +public class FreebaseDataExtensionJob { + static public class DataExtension { + final public Object[][] data; + + public DataExtension(Object[][] data) { + this.data = data; + } + } + + static public class ColumnInfo { + final public List names; + final public List path; + final public FreebaseType expectedType; + + protected ColumnInfo(List names, List path, FreebaseType expectedType) { + this.names = names; + this.path = path; + this.expectedType = expectedType; + } + } + + final public JSONObject extension; + final public int columnCount; + final public List columns = new ArrayList(); + + public FreebaseDataExtensionJob(JSONObject obj) throws JSONException { + this.extension = obj; + this.columnCount = (obj.has("properties") && !obj.isNull("properties")) ? + countColumns(obj.getJSONArray("properties"), columns, new ArrayList(), new ArrayList()) : 0; + } + + public Map extend( + Set ids, + Map reconCandidateMap + ) throws Exception { + StringWriter writer = new StringWriter(); + formulateQuery(ids, extension, writer); + + String query = writer.toString(); + InputStream is = doMqlRead(query); + try { + String s = ParsingUtilities.inputStreamToString(is); + JSONObject o = ParsingUtilities.evaluateJsonStringToObject(s); + + Map map = new HashMap(); + if (o.has("result")) { + JSONArray a = o.getJSONArray("result"); + int l = a.length(); + + for (int i = 0; i < l; i++) { + JSONObject o2 = a.getJSONObject(i); + String id = o2.getString("id"); + FreebaseDataExtensionJob.DataExtension ext = collectResult(o2, reconCandidateMap); + + if (ext != null) { + map.put(id, ext); + } + } + } + + return map; + } finally { + is.close(); + } + } + + protected FreebaseDataExtensionJob.DataExtension collectResult( + JSONObject obj, + Map reconCandidateMap + ) throws JSONException { + List rows = new ArrayList(); + + collectResult(rows, extension.getJSONArray("properties"), obj, 0, 0, reconCandidateMap); + + Object[][] data = new Object[rows.size()][columnCount]; + rows.toArray(data); + + return new DataExtension(data); + } + + protected void storeCell( + List rows, + int row, + int col, + Object value, + Map reconCandidateMap + ) { + while (row >= rows.size()) { + rows.add(new Object[columnCount]); + } + rows.get(row)[col] = value; + } + + protected void storeCell( + List rows, + int row, + int col, + JSONObject obj, + Map reconCandidateMap + ) throws JSONException { + String id = obj.getString("id"); + ReconCandidate rc; + if (reconCandidateMap.containsKey(id)) { + rc = reconCandidateMap.get(id); + } else { + rc = new ReconCandidate( + obj.getString("id"), + obj.getString("name"), + JSONUtilities.getStringArray(obj, "type"), + 100 + ); + + reconCandidateMap.put(id, rc); + } + + storeCell(rows, row, col, rc, reconCandidateMap); + } + + protected int[] collectResult( + List rows, + JSONObject extNode, + JSONObject resultNode, + int startRowIndex, + int startColumnIndex, + Map reconCandidateMap + ) throws JSONException { + String propertyID = extNode.getString("id"); + String expectedTypeID = extNode.getJSONObject("expected").getString("id"); + + JSONArray a = resultNode != null && resultNode.has(propertyID) && !resultNode.isNull(propertyID) ? + resultNode.getJSONArray(propertyID) : null; + + if (expectedTypeID.startsWith("/type/")) { + if (a != null) { + int l = a.length(); + for (int r = 0; r < l; r++) { + Object o = a.isNull(r) ? null : a.get(r); + if (o instanceof Serializable) { + storeCell(rows, startRowIndex++, startColumnIndex, o, reconCandidateMap); + } + } + } + + // note that we still take up a column even if we don't have any data + return new int[] { startRowIndex, startColumnIndex + 1 }; + } else { + boolean hasSubProperties = (extNode.has("properties") && !extNode.isNull("properties")); + boolean isOwnColumn = !hasSubProperties || (extNode.has("included") && extNode.getBoolean("included")); + + if (a != null && a.length() > 0) { + int maxColIndex = startColumnIndex; + + int l = a.length(); + for (int r = 0; r < l; r++) { + Object v = a.isNull(r) ? null : a.get(r); + JSONObject o = v != null && v instanceof JSONObject ? (JSONObject) v : null; + + int startColumnIndex2 = startColumnIndex; + int startRowIndex2 = startRowIndex; + + if (isOwnColumn) { + if (o != null) { + storeCell(rows, startRowIndex2++, startColumnIndex2++, o, reconCandidateMap); + } else { + storeCell(rows, startRowIndex2++, startColumnIndex2++, v, reconCandidateMap); + } + } + + if (hasSubProperties && o != null) { + int[] rowcol = collectResult( + rows, + extNode.getJSONArray("properties"), + o, + startRowIndex, + startColumnIndex2, + reconCandidateMap + ); + + startRowIndex2 = rowcol[0]; + startColumnIndex2 = rowcol[1]; + } + + startRowIndex = startRowIndex2; + maxColIndex = Math.max(maxColIndex, startColumnIndex2); + } + + return new int[] { startRowIndex, maxColIndex }; + } else { + return new int[] { + startRowIndex, + startColumnIndex + countColumns(extNode, null, new ArrayList(), new ArrayList()) + }; + } + } + } + + protected int[] collectResult( + List rows, + JSONArray subProperties, + JSONObject resultNode, + int startRowIndex, + int startColumnIndex, + Map reconCandidateMap + ) throws JSONException { + int maxStartRowIndex = startRowIndex; + + int k = subProperties.length(); + for (int c = 0; c < k; c++) { + int[] rowcol = collectResult( + rows, + subProperties.getJSONObject(c), + resultNode, + startRowIndex, + startColumnIndex, + reconCandidateMap + ); + + maxStartRowIndex = Math.max(maxStartRowIndex, rowcol[0]); + startColumnIndex = rowcol[1]; + } + + return new int[] { maxStartRowIndex, startColumnIndex }; + } + + + static protected InputStream doMqlRead(String query) throws IOException { + URL url = new URL("http://api.freebase.com/api/service/mqlread"); + + URLConnection connection = url.openConnection(); + connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded"); + connection.setConnectTimeout(5000); + connection.setDoOutput(true); + + DataOutputStream dos = new DataOutputStream(connection.getOutputStream()); + try { + String body = "extended=1&query=" + ParsingUtilities.encode(query); + + dos.writeBytes(body); + } finally { + dos.flush(); + dos.close(); + } + + connection.connect(); + + return connection.getInputStream(); + } + + static protected void formulateQuery(Set ids, JSONObject node, Writer writer) throws JSONException { + JSONWriter jsonWriter = new JSONWriter(writer); + + jsonWriter.object(); + jsonWriter.key("query"); + jsonWriter.array(); + jsonWriter.object(); + + jsonWriter.key("id"); jsonWriter.value(null); + jsonWriter.key("id|="); + jsonWriter.array(); + for (String id : ids) { + if (id != null) { + jsonWriter.value(id); + } + } + jsonWriter.endArray(); + + formulateQueryNode(node.getJSONArray("properties"), jsonWriter); + + jsonWriter.endObject(); + jsonWriter.endArray(); + jsonWriter.endObject(); + } + + static protected void formulateQueryNode(JSONObject node, JSONWriter writer) throws JSONException { + String propertyID = node.getString("id"); + String expectedTypeID = node.getJSONObject("expected").getString("id"); + + writer.key(propertyID); + writer.array(); + { + if (!expectedTypeID.startsWith("/type/")) { // not literal + writer.object(); + writer.key("optional"); writer.value(true); + + boolean hasLimit = false; + if (node.has("constraints") && !node.isNull("constraints")) { + JSONObject constraints = node.getJSONObject("constraints"); + + String[] names = JSONObject.getNames(constraints); + for (String name : names) { + Object value = constraints.get(name); + if (name.equals("limit")) { + hasLimit = true; + } + + if (!name.contains(":") && + !name.equals("limit") && + !name.equals("optional") && + !name.equals("count") && + !name.equals("estimate-count") && + !name.equals("sort") && + !name.equals("return")) { + + if (name.startsWith("!")) { + name = "!c:" + name.substring(1); + } else { + name = "c:" + name; + } + } + writer.key(name); + writer.value(value); + } + } + if (!hasLimit) { + writer.key("limit"); writer.value(10); + } + + { + boolean hasSubProperties = (node.has("properties") && !node.isNull("properties")); + + if (!hasSubProperties || (node.has("included") && node.getBoolean("included"))) { + writer.key("name"); writer.value(null); + writer.key("id"); writer.value(null); + writer.key("type"); writer.array(); writer.endArray(); + } + + if (hasSubProperties) { + formulateQueryNode(node.getJSONArray("properties"), writer); + } + } + writer.endObject(); + } + } + writer.endArray(); + } + + static protected void formulateQueryNode(JSONArray propertiesA, JSONWriter writer) throws JSONException { + int l = propertiesA.length(); + + for (int i = 0; i < l; i++) { + formulateQueryNode(propertiesA.getJSONObject(i), writer); + } + } + + static protected int countColumns(JSONObject obj, List columns, List names, List path) throws JSONException { + String name = obj.getString("name"); + + List names2 = null; + List path2 = null; + if (columns != null) { + names2 = new ArrayList(names); + names2.add(name); + + path2 = new ArrayList(path); + path2.add(obj.getString("id")); + } + + if (obj.has("properties") && !obj.isNull("properties")) { + boolean included = (obj.has("included") && obj.getBoolean("included")); + if (included && columns != null) { + JSONObject expected = obj.getJSONObject("expected"); + + columns.add(new ColumnInfo(names2, path2, + new FreebaseType(expected.getString("id"), expected.getString("name")))); + } + + return (included ? 1 : 0) + + countColumns(obj.getJSONArray("properties"), columns, names2, path2); + } else { + if (columns != null) { + JSONObject expected = obj.getJSONObject("expected"); + + columns.add(new ColumnInfo(names2, path2, + new FreebaseType(expected.getString("id"), expected.getString("name")))); + } + return 1; + } + } + + static protected int countColumns(JSONArray a, List columns, List names, List path) throws JSONException { + int c = 0; + int l = a.length(); + for (int i = 0; i < l; i++) { + c += countColumns(a.getJSONObject(i), columns, names, path); + } + return c; + } +} \ No newline at end of file diff --git a/main/src/com/google/refine/util/FreebaseUtils.java b/main/src/com/google/refine/util/FreebaseUtils.java new file mode 100644 index 000000000..3d8d46430 --- /dev/null +++ b/main/src/com/google/refine/util/FreebaseUtils.java @@ -0,0 +1,251 @@ +package com.google.refine.util; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import javax.servlet.http.HttpServletRequest; + +import oauth.signpost.OAuthConsumer; +import oauth.signpost.exception.OAuthCommunicationException; +import oauth.signpost.exception.OAuthExpectationFailedException; +import oauth.signpost.exception.OAuthMessageSignerException; + +import org.apache.http.Header; +import org.apache.http.HttpResponse; +import org.apache.http.NameValuePair; +import org.apache.http.client.ClientProtocolException; +import org.apache.http.client.HttpClient; +import org.apache.http.client.entity.UrlEncodedFormEntity; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.impl.client.DefaultHttpClient; +import org.apache.http.message.BasicNameValuePair; +import org.apache.http.params.CoreProtocolPNames; +import org.apache.http.util.EntityUtils; +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; + +import com.google.refine.GridworksServlet; +import com.google.refine.ProjectManager; +import com.google.refine.oauth.Credentials; +import com.google.refine.oauth.OAuthUtilities; +import com.google.refine.oauth.Provider; + +public class FreebaseUtils { + + static final public String FREEBASE_HOST = "www.freebase.com"; + static final public String FREEBASE_SANDBOX_HOST = "www.sandbox-freebase.com"; + + static final private String FREEQ_URL = "http://data.labs.freebase.com/freeq/gridworks"; + + static final private String GRIDWORKS_ID = "/en/gridworks"; + + private static String getUserInfoURL(String host) { + return "http://" + host + "/api/service/user_info"; + } + + private static String getMQLWriteURL(String host) { + return "http://" + host + "/api/service/mqlwrite"; + } + + private static String getMQLReadURL(String host) { + return "http://" + host + "/api/service/mqlread"; + } + + public static String getUserInfo(Credentials credentials, Provider provider) + throws OAuthMessageSignerException, OAuthExpectationFailedException, OAuthCommunicationException, ClientProtocolException, IOException { + + OAuthConsumer consumer = OAuthUtilities.getConsumer(credentials, provider); + + HttpGet httpRequest = new HttpGet(getUserInfoURL(provider.getHost())); + httpRequest.getParams().setParameter(CoreProtocolPNames.USER_AGENT, "Gridworks " + GridworksServlet.getVersion()); + + // this is required by the Metaweb API to avoid XSS + httpRequest.setHeader("X-Requested-With", "1"); + + // sign the request with the oauth library + consumer.sign(httpRequest); + + // execute the request + HttpClient httpClient = new DefaultHttpClient(); + HttpResponse httpResponse = httpClient.execute(httpRequest); + + // return the results + return EntityUtils.toString(httpResponse.getEntity()); + } + + public static String getUserBadges(Provider provider, String user_id) + throws ClientProtocolException, IOException, JSONException { + + String query = "{" + + "'id' : '" + user_id + "'," + + "'!/type/usergroup/member' : [{" + + "'id' : null," + + "'key' : [{" + + "'namespace' : null" + + "}]" + + "}]" + + "}".replace("'", "\""); + + return mqlread(provider, query); + } + + public static String mqlread(Provider provider, String query) + throws ClientProtocolException, IOException, JSONException { + + JSONObject envelope = new JSONObject(); + envelope.put("query", new JSONObject(query)); + + List formparams = new ArrayList(); + formparams.add(new BasicNameValuePair("query", envelope.toString())); + UrlEncodedFormEntity entity = new UrlEncodedFormEntity(formparams, "UTF-8"); + + HttpPost httpRequest = new HttpPost(getMQLReadURL(provider.getHost())); + httpRequest.getParams().setParameter(CoreProtocolPNames.USER_AGENT, "Gridworks " + GridworksServlet.getVersion()); + httpRequest.setEntity(entity); + + // this is required by the Metaweb API to avoid XSS + httpRequest.setHeader("X-Requested-With", "1"); + + // execute the request + HttpClient httpClient = new DefaultHttpClient(); + HttpResponse httpResponse = httpClient.execute(httpRequest); + + // return the results + return EntityUtils.toString(httpResponse.getEntity()); + } + + public static String mqlwrite(Credentials credentials, Provider provider, String query) + throws OAuthMessageSignerException, OAuthExpectationFailedException, OAuthCommunicationException, ClientProtocolException, IOException, JSONException { + OAuthConsumer consumer = OAuthUtilities.getConsumer(credentials, provider); + + JSONObject envelope = new JSONObject(); + envelope.put("query", new JSONObject(query)); + + List formparams = new ArrayList(); + formparams.add(new BasicNameValuePair("query", envelope.toString())); + UrlEncodedFormEntity entity = new UrlEncodedFormEntity(formparams, "UTF-8"); + + HttpPost httpRequest = new HttpPost(getMQLWriteURL(provider.getHost())); + httpRequest.getParams().setParameter(CoreProtocolPNames.USER_AGENT, "Gridworks " + GridworksServlet.getVersion()); + httpRequest.setEntity(entity); + + // this is required by the Metaweb API to avoid XSS + httpRequest.setHeader("X-Requested-With", "1"); + + // sign the request with the oauth library + consumer.sign(httpRequest); + + // execute the request + HttpClient httpClient = new DefaultHttpClient(); + HttpResponse httpResponse = httpClient.execute(httpRequest); + + // return the results + return EntityUtils.toString(httpResponse.getEntity()); + } + + public static String uploadTriples( + HttpServletRequest request, + String graph, + String source_name, + String source_id, + String mdo_id, + String triples + ) throws OAuthMessageSignerException, OAuthExpectationFailedException, OAuthCommunicationException, ClientProtocolException, JSONException, IOException { + + Provider provider = OAuthUtilities.getProvider(FREEBASE_HOST); + + Credentials credentials = Credentials.getCredentials(request, provider, Credentials.Type.ACCESS); + + JSONObject mdo_info = new JSONObject(); + mdo_info.put("name", source_name); + if (source_id != null) { + mdo_info.put("info_source",source_id); + } + + JSONObject user_info = new JSONObject(getUserInfo(credentials, provider)); + if (user_info.has("username")) { + + String user_id = user_info.getString("id"); + boolean allowed = isAllowedToWrite(provider, graph, user_id); + + if (allowed) { + List formparams = new ArrayList(); + formparams.add(new BasicNameValuePair("user", user_info.getString("id"))); + formparams.add(new BasicNameValuePair("action_type", "LOAD_TRIPLE")); + formparams.add(new BasicNameValuePair("operator", user_info.getString("id"))); + formparams.add(new BasicNameValuePair("software_tool_used", GRIDWORKS_ID)); + formparams.add(new BasicNameValuePair("rabj", "true")); + formparams.add(new BasicNameValuePair("mdo_info", mdo_info.toString())); + formparams.add(new BasicNameValuePair("graphport", graph)); + formparams.add(new BasicNameValuePair("payload", triples)); + formparams.add(new BasicNameValuePair("check_params", "false")); + if (mdo_id != null) { + formparams.add(new BasicNameValuePair("mdo_guid", mdo_id)); + } + UrlEncodedFormEntity entity = new UrlEncodedFormEntity(formparams, "UTF-8"); + + HttpPost httpRequest = new HttpPost(getFreeQUrl()); + httpRequest.getParams().setParameter(CoreProtocolPNames.USER_AGENT, "Gridworks " + GridworksServlet.getVersion()); + httpRequest.setEntity(entity); + + HttpPost surrogateRequest = new HttpPost(getUserInfoURL(FREEBASE_HOST)); + surrogateRequest.setEntity(entity); + + OAuthConsumer consumer = OAuthUtilities.getConsumer(credentials, provider); + + consumer.sign(surrogateRequest); + + Header[] h = surrogateRequest.getHeaders("Authorization"); + if (h.length > 0) { + httpRequest.setHeader("X-Freebase-Credentials", h[0].getValue()); + } else { + throw new RuntimeException("Couldn't find the oauth signature header in the surrogate request"); + } + + // execute the request + HttpClient httpClient = new DefaultHttpClient(); + HttpResponse httpResponse = httpClient.execute(httpRequest); + + // return the results + return EntityUtils.toString(httpResponse.getEntity()); + } else { + throw new RuntimeException("User '" + user_id + "' is not allowed to write to '" + graph + "' with Gridworks"); + } + } else { + throw new RuntimeException("Invalid credentials"); + } + } + + private static boolean isAllowedToWrite(Provider provider, String graph, String user_id) throws JSONException, ClientProtocolException, IOException { + if ("sandbox".equals(graph)) return true; + + JSONObject user_badges = new JSONObject(getUserBadges(provider, user_id)); + JSONObject result = user_badges.getJSONObject("result"); + + if (result == null) { + throw new RuntimeException("Error evaluating badges for user '" + user_id + "'"); + } + + boolean allowed = false; + + JSONArray badges = result.getJSONArray("!/type/usergroup/member"); + for (int i = 0; i < badges.length(); i++) { + JSONObject o = badges.getJSONObject(i); + String id = o.getString("id"); + if ("/en/metaweb_staff".equals(id)) { + allowed = true; + break; + } + } + + return allowed; + } + + static public String getFreeQUrl() { + String url = (String) ProjectManager.singleton.getPreferenceStore().get("freebase.freeq"); + return url != null ? url : FREEQ_URL; + } +} diff --git a/main/src/com/google/refine/util/IOUtils.java b/main/src/com/google/refine/util/IOUtils.java new file mode 100644 index 000000000..95c4f9b55 --- /dev/null +++ b/main/src/com/google/refine/util/IOUtils.java @@ -0,0 +1,45 @@ +package com.google.refine.util; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +public class IOUtils { + + private static final int DEFAULT_BUFFER_SIZE = 4 * 1024; + + public static long copy(InputStream input, OutputStream output) throws IOException { + byte[] buffer = new byte[DEFAULT_BUFFER_SIZE]; + long count = 0; + int n = 0; + while (-1 != (n = input.read(buffer))) { + output.write(buffer, 0, n); + count += n; + } + return count; + } + + public static long copy(InputStream input, File file) throws IOException { + FileOutputStream output = new FileOutputStream(file); + byte[] buffer = new byte[DEFAULT_BUFFER_SIZE]; + long count = 0; + int n = 0; + try { + while (-1 != (n = input.read(buffer))) { + output.write(buffer, 0, n); + count += n; + } + } finally { + try { + output.close(); + } catch (IOException e) {} + try { + input.close(); + } catch (IOException e) {} + } + return count; + } + +} diff --git a/main/src/com/google/refine/util/IndentWriter.java b/main/src/com/google/refine/util/IndentWriter.java new file mode 100644 index 000000000..072244338 --- /dev/null +++ b/main/src/com/google/refine/util/IndentWriter.java @@ -0,0 +1,77 @@ +/* + * Created on Dec 1, 2005 + * Created by dfhuynh + */ +package com.google.refine.util; + +import java.io.IOException; +import java.io.Writer; + +/** + * A utility for writing indented code. + * + * @author dfhuynh + */ +public class IndentWriter { + final static private int s_max = 20; + + static private String[] s_indents = new String[s_max]; + static { + for (int i = 0; i < s_max; i++) { + StringBuffer sb = new StringBuffer(s_max); + for (int j = 0; j < i; j++) { + sb.append('\t'); + } + s_indents[i] = sb.toString(); + } + } + + private Writer m_writer; + private int m_count = 0; + private boolean m_indent = true; + + public IndentWriter(Writer writer) { + m_writer = writer; + } + + public void close() throws IOException { + m_writer.close(); + } + + public void flush() throws IOException { + m_writer.flush(); + } + + public void print(Object o) throws IOException { + printIndent(); + m_writer.write(o.toString()); + m_indent = false; + } + + public void println() throws IOException { + printIndent(); + m_writer.write("\n"); + m_indent = true; + } + + public void println(Object o) throws IOException { + printIndent(); + m_writer.write(o.toString()); + m_writer.write("\n"); + m_indent = true; + } + + public void indent() { + m_count++; + } + + public void unindent() { + m_count--; + } + + private void printIndent() throws IOException { + if (m_indent) { + m_writer.write(s_indents[m_count]); + } + } +} diff --git a/main/src/com/google/refine/util/JSONUtilities.java b/main/src/com/google/refine/util/JSONUtilities.java new file mode 100644 index 000000000..8c9e1717f --- /dev/null +++ b/main/src/com/google/refine/util/JSONUtilities.java @@ -0,0 +1,155 @@ +package com.google.refine.util; + +import java.util.ArrayList; +import java.util.Calendar; +import java.util.Date; +import java.util.List; + +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONWriter; + +public class JSONUtilities { + static public String getString(JSONObject obj, String key, String def) { + try { + return obj.getString(key); + } catch (JSONException e) { + return def; + } + } + + static public int getInt(JSONObject obj, String key, int def) { + try { + return obj.getInt(key); + } catch (JSONException e) { + return def; + } + } + + static public boolean getBoolean(JSONObject obj, String key, boolean def) { + try { + return obj.getBoolean(key); + } catch (JSONException e) { + return def; + } + } + + static public double getDouble(JSONObject obj, String key, double def) { + try { + return obj.getDouble(key); + } catch (JSONException e) { + return def; + } + } + + static public long getLong(JSONObject obj, String key, long def) { + try { + return obj.getLong(key); + } catch (JSONException e) { + return def; + } + } + + static public Date getDate(JSONObject obj, String key, Date def) { + try { + Date d = ParsingUtilities.stringToDate(obj.getString(key)); + + return d != null ? d : def; + } catch (JSONException e) { + return def; + } + } + + static public int[] getIntArray(JSONObject obj, String key) { + try { + JSONArray a = obj.getJSONArray(key); + int[] r = new int[a.length()]; + + for (int i = 0; i < r.length; i++) { + r[i] = a.getInt(i); + } + + return r; + } catch (JSONException e) { + return new int[0]; + } + } + + static public String[] getStringArray(JSONObject obj, String key) { + try { + JSONArray a = obj.getJSONArray(key); + String[] r = new String[a.length()]; + + for (int i = 0; i < r.length; i++) { + r[i] = a.getString(i); + } + + return r; + } catch (JSONException e) { + return new String[0]; + } + } + + static public void getStringList(JSONObject obj, String key, List list) { + try { + JSONArray a = obj.getJSONArray(key); + int count = a.length(); + + for (int i = 0; i < count; i++) { + list.add(a.getString(i)); + } + } catch (JSONException e) { + } + } + + static public void writeStringList(JSONWriter writer, List list) throws JSONException { + writer.array(); + for (String s : list) { + writer.value(s); + } + writer.endArray(); + } + + static public void putField(JSONObject obj, String key, Object value) throws JSONException { + if (value instanceof Integer) { + obj.put(key, ((Integer) value).intValue()); + } else if (value instanceof Long) { + obj.put(key, ((Long) value).intValue()); + } else if (value instanceof Number) { + obj.put(key, ((Double) value).doubleValue()); + } else if (value instanceof Boolean) { + obj.put(key, (Boolean) value); + } else if (value instanceof Date) { + obj.put(key, ParsingUtilities.dateToString((Date) value)); + } else if (value instanceof Calendar) { + obj.put(key, ParsingUtilities.dateToString(((Calendar) value).getTime())); + } else if (value instanceof String) { + obj.put(key, (String) value); + } else { + obj.put(key, value.toString()); + } + } + + static public Object[] toArray(JSONArray a) throws JSONException { + int l = a.length(); + + Object[] a2 = new Object[l]; + for (int i = 0; i < l; i++) { + a2[i] = a.get(i); + } + + return a2; + } + + static public List toStringList(JSONArray a) throws JSONException { + int l = a.length(); + + List list = new ArrayList(); + for (int i = 0; i < l; i++) { + list.add(a.getString(i)); + } + + return list; + } +} diff --git a/main/src/com/google/refine/util/JSObject.java b/main/src/com/google/refine/util/JSObject.java new file mode 100644 index 000000000..33dc1a7b0 --- /dev/null +++ b/main/src/com/google/refine/util/JSObject.java @@ -0,0 +1,130 @@ +package com.google.refine.util; + +import java.io.IOException; +import java.util.Collection; +import java.util.Enumeration; +import java.util.Iterator; +import java.util.Properties; + +import org.apache.commons.lang.StringEscapeUtils; +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; + +/** + * A utility class for encapsulating a Javascript object that can + * then be pretty-printed out through an IndentWriter. + * + * @author dfhuynh + */ +public class JSObject extends Properties { + private static final long serialVersionUID = 5864375136126385719L; + + static public void writeJSObject(IndentWriter writer, JSObject jso) throws IOException, JSONException { + writer.println("{"); + writer.indent(); + { + Enumeration e = jso.propertyNames(); + while (e.hasMoreElements()) { + String name = (String) e.nextElement(); + Object value = jso.get(name); + + writer.print("'"); + writer.print(name + "' : "); + writeObject(writer, value); + + if (e.hasMoreElements()) { + writer.println(","); + } else { + writer.println(); + } + } + } + writer.unindent(); + writer.print("}"); + } + + static public void writeCollection(IndentWriter writer, Collection c) throws IOException, JSONException { + writer.println("["); + writer.indent(); + { + Iterator i = c.iterator(); + while (i.hasNext()) { + writeObject(writer, i.next()); + if (i.hasNext()) { + writer.println(","); + } else { + writer.println(); + } + } + } + writer.unindent(); + writer.print("]"); + } + + static public void writeJSONObject(IndentWriter writer, JSONObject no) throws IOException, JSONException { + writer.println("{"); + writer.indent(); + { + String[] names = JSONObject.getNames(no); + for (int i = 0; i < names.length; i++) { + String name = names[i]; + Object value = no.get(name); + + writer.print("'"); + writer.print(name + "' : "); + writeObject(writer, value); + + if (i < names.length - 1) { + writer.println(","); + } else { + writer.println(); + } + } + } + writer.unindent(); + writer.print("}"); + } + + static public void writeJSONArray(IndentWriter writer, JSONArray na) throws IOException, JSONException { + writer.println("["); + writer.indent(); + { + int count = na.length(); + for (int i = 0; i < count; i++) { + Object element = na.get(i); + + writeObject(writer, element); + if (i < count - 1) { + writer.println(","); + } else { + writer.println(); + } + } + } + writer.unindent(); + writer.print("]"); + } + + static public void writeObject(IndentWriter writer, Object o) throws IOException, JSONException { + if (o == null) { + writer.print("null"); + } else if (o instanceof Boolean) { + writer.print(((Boolean) o).booleanValue() ? "true" : "false"); + } else if (o instanceof Number) { + writer.print(((Number) o).toString()); + + } else if (o instanceof Collection) { + writeCollection(writer, (Collection) o); + } else if (o instanceof JSONArray) { + writeJSONArray(writer, (JSONArray) o); + } else if (o instanceof JSObject) { + writeJSObject(writer, (JSObject) o); + } else if (o instanceof JSONObject) { + writeJSONObject(writer, (JSONObject) o); + + } else { + writer.print("\"" + StringEscapeUtils.escapeJavaScript(o.toString()) + "\""); + } + } +} diff --git a/main/src/com/google/refine/util/ParsingUtilities.java b/main/src/com/google/refine/util/ParsingUtilities.java new file mode 100644 index 000000000..cb0c43c79 --- /dev/null +++ b/main/src/com/google/refine/util/ParsingUtilities.java @@ -0,0 +1,128 @@ +package com.google.refine.util; + +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; +import java.io.UnsupportedEncodingException; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.Properties; + +import javax.servlet.http.HttpServletRequest; + +import org.apache.commons.codec.DecoderException; +import org.apache.commons.codec.net.URLCodec; +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONTokener; + +public class ParsingUtilities { + + static final public SimpleDateFormat s_sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"); + + static public Properties parseUrlParameters(HttpServletRequest request) { + Properties options = new Properties(); + + String query = request.getQueryString(); + if (query != null) { + if (query.startsWith("?")) { + query = query.substring(1); + } + parseParameters(options,query); + } + return options; + } + + static public Properties parseParameters(Properties p, String str) { + if (str != null) { + String[] pairs = str.split("&"); + for (String pairString : pairs) { + int equal = pairString.indexOf('='); + String name = (equal >= 0) ? pairString.substring(0, equal) : ""; + String value = (equal >= 0) ? ParsingUtilities.decode(pairString.substring(equal + 1)) : ""; + p.put(name, value); + } + } + return p; + } + + static public Properties parseParameters(String str) { + return (str == null) ? null : parseParameters(new Properties(),str); + } + + static public String inputStreamToString(InputStream is) throws IOException { + Reader reader = new InputStreamReader(is, "UTF-8"); + try { + return readerToString(reader); + } finally { + reader.close(); + } + } + + static public String readerToString(Reader reader) throws IOException { + StringBuffer sb = new StringBuffer(); + + char[] chars = new char[8192]; + int c; + + while ((c = reader.read(chars)) > 0) { + sb.insert(sb.length(), chars, 0, c); + } + + return sb.toString(); + } + + static public JSONObject evaluateJsonStringToObject(String s) throws JSONException { + if( s == null ) throw new IllegalArgumentException("parameter 's' should not be null"); + JSONTokener t = new JSONTokener(s); + Object o = t.nextValue(); + if (o instanceof JSONObject) { + return (JSONObject) o; + } else { + throw new JSONException(s + " couldn't be parsed as JSON object"); + } + } + + static public JSONArray evaluateJsonStringToArray(String s) throws JSONException { + JSONTokener t = new JSONTokener(s); + Object o = t.nextValue(); + if (o instanceof JSONArray) { + return (JSONArray) o; + } else { + throw new JSONException(s + " couldn't be parsed as JSON array"); + } + } + + private static final URLCodec codec = new URLCodec(); + static public String encode(String s) { + try { + return codec.encode(s, "UTF-8"); + } catch (UnsupportedEncodingException e) { + return s; // should not happen + } + } + static public String decode(String s) { + try { + return codec.decode(s, "UTF-8"); + } catch (UnsupportedEncodingException e) { + return s; // should not happen + } catch (DecoderException e) { + return s; // should not happen + } + } + + static public String dateToString(Date d) { + return s_sdf.format(d); + } + + static public Date stringToDate(String s) { + try { + return s_sdf.parse(s); + } catch (ParseException e) { + return null; + } + } +} diff --git a/main/src/com/google/refine/util/Pool.java b/main/src/com/google/refine/util/Pool.java new file mode 100644 index 000000000..a6b30d11d --- /dev/null +++ b/main/src/com/google/refine/util/Pool.java @@ -0,0 +1,162 @@ +package com.google.refine.util; + +import java.io.IOException; +import java.io.LineNumberReader; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.Reader; +import java.io.Writer; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; +import java.util.Map.Entry; + +import org.json.JSONException; +import org.json.JSONWriter; + +import com.google.refine.GridworksServlet; +import com.google.refine.Jsonizable; +import com.google.refine.model.Recon; +import com.google.refine.model.ReconCandidate; + +public class Pool implements Jsonizable { + final protected Map candidates = new HashMap(); + final protected Map recons = new HashMap(); + + public void pool(ReconCandidate candidate) { + candidates.put(candidate.id, candidate); + } + + public void pool(Recon recon) { + recons.put(Long.toString(recon.id), recon); + poolReconCandidates(recon); + } + + public void poolReconCandidates(Recon recon) { + if (recon.match != null) { + pool(recon.match); + } + if (recon.candidates != null) { + for (ReconCandidate candidate : recon.candidates) { + pool(candidate); + } + } + } + + public Recon getRecon(String id) { + return recons.get(id); + } + + public ReconCandidate getReconCandidate(String topicID) { + return candidates.get(topicID); + } + + public void save(OutputStream out) throws IOException { + Writer writer = new OutputStreamWriter(out); + try { + save(writer); + } finally { + writer.flush(); + } + } + + public void save(Writer writer) throws IOException { + writer.write(GridworksServlet.getVersion()); writer.write('\n'); + + Properties options = new Properties(); + options.setProperty("mode", "save"); + options.put("pool", this); + + Collection candidates2 = candidates.values(); + writer.write("reconCandidateCount=" + candidates2.size()); writer.write('\n'); + + for (ReconCandidate c : candidates2) { + JSONWriter jsonWriter = new JSONWriter(writer); + try { + c.write(jsonWriter, options); + + writer.write('\n'); + } catch (JSONException e) { + e.printStackTrace(); + } + } + + Collection recons2 = recons.values(); + writer.write("reconCount=" + recons2.size()); writer.write('\n'); + + for (Recon recon : recons2) { + JSONWriter jsonWriter = new JSONWriter(writer); + try { + recon.write(jsonWriter, options); + + writer.write('\n'); + } catch (JSONException e) { + e.printStackTrace(); + } + } + } + + public void load(Reader reader) throws Exception { + LineNumberReader reader2 = new LineNumberReader(reader); + + /* String version = */ reader2.readLine(); + + String line; + while ((line = reader2.readLine()) != null) { + int equal = line.indexOf('='); + CharSequence field = line.subSequence(0, equal); + String value = line.substring(equal + 1); + + if ("reconCandidateCount".equals(field)) { + int count = Integer.parseInt(value); + + for (int i = 0; i < count; i++) { + line = reader2.readLine(); + if (line != null) { + ReconCandidate candidate = ReconCandidate.loadStreaming(line); + if (candidate != null) { + pool(candidate); + } + } + } + } else if ("reconCount".equals(field)) { + int count = Integer.parseInt(value); + + for (int i = 0; i < count; i++) { + line = reader2.readLine(); + if (line != null) { + Recon recon = Recon.loadStreaming(line, this); + if (recon != null) { + pool(recon); + } + } + } + } + } + } + + public void write(JSONWriter writer, Properties options) + throws JSONException { + + writer.object(); + + writer.key("reconCandidates"); + writer.object(); + for (Entry entry : candidates.entrySet()) { + writer.key(entry.getKey()); + entry.getValue().write(writer, options); + } + writer.endObject(); + + writer.key("recons"); + writer.object(); + for (Entry entry : recons.entrySet()) { + writer.key(entry.getKey().toString()); + entry.getValue().write(writer, options); + } + writer.endObject(); + + writer.endObject(); + } +} diff --git a/main/tests/server/src/com/google/gridworks/tests/GridworksServletStub.java b/main/tests/server/src/com/google/gridworks/tests/GridworksServletStub.java deleted file mode 100644 index 345dd761e..000000000 --- a/main/tests/server/src/com/google/gridworks/tests/GridworksServletStub.java +++ /dev/null @@ -1,46 +0,0 @@ -package com.google.gridworks.tests; - -import java.io.IOException; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import com.google.gridworks.GridworksServlet; -import com.google.gridworks.commands.Command; - -/** - * Exposes protected methods of com.google.gridworks.GridworksServlet as public for unit testing - * - */ -public class GridworksServletStub extends GridworksServlet { - - //requirement of extending HttpServlet, not required for testing - private static final long serialVersionUID = 1L; - - public void wrapService(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException{ - super.service(request, response); - } - - public String wrapGetCommandName(HttpServletRequest request){ - return super.getCommandKey(request); - } - - //-------------------helper methods-------------- - /** - * Helper method for inserting a mock object - * @param commandName - * @param command - */ - public void insertCommand(String commandName, Command command ){ - registerOneCommand("core/" + commandName, command); - } - - /** - * Helper method for clearing up after testing - * @param commandName - */ - public void removeCommand( String commandName ){ - unregisterCommand(commandName); - } -} diff --git a/main/tests/server/src/com/google/gridworks/tests/GridworksServletTests.java b/main/tests/server/src/com/google/gridworks/tests/GridworksServletTests.java deleted file mode 100644 index 7f56e10f9..000000000 --- a/main/tests/server/src/com/google/gridworks/tests/GridworksServletTests.java +++ /dev/null @@ -1,186 +0,0 @@ -package com.google.gridworks.tests; - -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.times; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; - -import java.io.IOException; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.slf4j.LoggerFactory; -import org.testng.Assert; -import org.testng.annotations.AfterMethod; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.BeforeTest; -import org.testng.annotations.Test; - -import com.google.gridworks.commands.Command; - -public class GridworksServletTests extends GridworksTest { - - @BeforeTest - public void init() { - logger = LoggerFactory.getLogger(this.getClass()); - } - - //System under test - GridworksServletStub SUT = null; - - //variables - final static private String TEST_COMMAND_NAME = "test-command"; - final static private String TEST_COMMAND_PATH = "/command/core/test-command/foobar"; - final static private String BAD_COMMAND_PATH = "/command-does-not-exist"; - - final static private String POST = "POST"; - final static private String GET = "GET"; - - // mocks - HttpServletRequest request = null; - HttpServletResponse response = null; - Command command = null; - - - @BeforeMethod - public void SetUp() throws ServletException { - request = mock(HttpServletRequest.class); - response = mock(HttpServletResponse.class); - command = mock(Command.class); - - SUT = new GridworksServletStub(); - SUT.insertCommand(TEST_COMMAND_NAME,command); //inject mock into command container - } - - @AfterMethod - public void TearDown() { - SUT.removeCommand(TEST_COMMAND_NAME); //remove mock to clean command container - SUT = null; - - request = null; - response = null; - command = null; - } - - //-------------------AutoSaveTimerTask tests----------- - //TODO would need to mock Timer and inject it into GridworksServlet. Also need to deal with ProjectManager.singleton - //-------------------init tests------------------------ - //TODO need to stub super.init(), mock Timer and inject it into GridworksServlet - //-------------------destroy tests--------------------- - //TODO need to mock Timer and inject it into GridworksServlet. Also need to deal with ProjectManager.singleton - - //--------------------doGet tests---------------------- - @Test - public void doGetRegressionTest(){ - whenGetCommandNameThenReturn(TEST_COMMAND_PATH); - whenGetMethodThenReturn(GET); - - try { - SUT.wrapService(request, response); - } catch (ServletException e) { - Assert.fail(); - } catch (IOException e) { - Assert.fail(); - } - - verifyGetCommandNameCalled(); - try { - verify(command,times(1)).doGet(request, response); - } catch (ServletException e) { - Assert.fail(); - } catch (IOException e) { - Assert.fail(); - } - } - - @Test - public void doGetReturnsError404WhenCommandNotFound(){ - whenGetCommandNameThenReturn(BAD_COMMAND_PATH); - whenGetMethodThenReturn(GET); - - try { - SUT.wrapService(request, response); - } catch (ServletException e) { - Assert.fail(); - } catch (IOException e) { - Assert.fail(); - } - - verifyGetCommandNameCalled(); - verifyError404Called(); - - } - - //----------------doPost tests------------------------- - @Test - public void doPostRegressionTest(){ - whenGetCommandNameThenReturn(TEST_COMMAND_PATH); - whenGetMethodThenReturn(POST); - - try { - SUT.wrapService(request, response); - } catch (ServletException e) { - Assert.fail(); - } catch (IOException e) { - Assert.fail(); - } - - verifyGetCommandNameCalled(); - try { - verify(command,times(1)).doPost(request, response); - } catch (ServletException e) { - Assert.fail(); - } catch (IOException e) { - Assert.fail(); - } - } - - @Test - public void doPostReturns404WhenCommandNotFound(){ - whenGetCommandNameThenReturn(BAD_COMMAND_PATH); - whenGetMethodThenReturn(POST); - - try { - SUT.wrapService(request, response); - } catch (ServletException e) { - Assert.fail(); - } catch (IOException e) { - Assert.fail(); - } - - verifyGetCommandNameCalled(); - verifyError404Called(); - } - - //----------------getCommandName tests---------------- - - @Test - public void getCommandNameHandlesBadCommandName(){ - - when(request.getPathInfo()).thenReturn("/command/this-command-has-no-trailing-slash"); - - Assert.assertEquals("this-command-has-no-trailing-slash", SUT.wrapGetCommandName(request)); - - verify(request, times(1)).getPathInfo(); - } - - //------------helpers - protected void whenGetCommandNameThenReturn(String commandName){ - when(request.getPathInfo()).thenReturn(commandName); - } - protected void whenGetMethodThenReturn(String method){ - when(request.getMethod()).thenReturn(method); - } - protected void verifyGetCommandNameCalled(){ - verify(request,times(2)).getPathInfo(); - } - protected void verifyError404Called(){ - try { - verify(response,times(1)).sendError(404); - } catch (IOException e) { - Assert.fail(); - } - } -} diff --git a/main/tests/server/src/com/google/gridworks/tests/GridworksTest.java b/main/tests/server/src/com/google/gridworks/tests/GridworksTest.java deleted file mode 100644 index c710266d9..000000000 --- a/main/tests/server/src/com/google/gridworks/tests/GridworksTest.java +++ /dev/null @@ -1,52 +0,0 @@ -package com.google.gridworks.tests; - -import org.slf4j.Logger; -import org.testng.Assert; -import org.testng.annotations.BeforeSuite; - -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; - -public class GridworksTest { - - protected Logger logger; - - @BeforeSuite - public void init() { - System.setProperty("log4j.configuration", "tests.log4j.properties"); - } - - public static void assertProjectCreated(Project project, int numCols, int numRows) { - Assert.assertNotNull(project); - Assert.assertNotNull(project.columnModel); - Assert.assertNotNull(project.columnModel.columns); - Assert.assertEquals(project.columnModel.columns.size(), numCols); - Assert.assertNotNull(project.rows); - Assert.assertEquals(project.rows.size(), numRows); - } - - public void log(Project project) { - // some quick and dirty debugging - StringBuilder sb = new StringBuilder(); - for(Column c : project.columnModel.columns){ - sb.append(c.getName()); - sb.append("; "); - } - logger.info(sb.toString()); - for(Row r : project.rows){ - sb = new StringBuilder(); - for(int i = 0; i < r.cells.size(); i++){ - Cell c = r.getCell(i); - if(c != null){ - sb.append(c.value); - sb.append("; "); - }else{ - sb.append("null; "); - } - } - logger.info(sb.toString()); - } - } -} diff --git a/main/tests/server/src/com/google/gridworks/tests/ProjectManagerStub.java b/main/tests/server/src/com/google/gridworks/tests/ProjectManagerStub.java deleted file mode 100644 index 745021415..000000000 --- a/main/tests/server/src/com/google/gridworks/tests/ProjectManagerStub.java +++ /dev/null @@ -1,79 +0,0 @@ -package com.google.gridworks.tests; - -import java.io.IOException; -import java.io.InputStream; - -import org.apache.tools.tar.TarOutputStream; - -import com.google.gridworks.ProjectManager; -import com.google.gridworks.ProjectMetadata; -import com.google.gridworks.history.HistoryEntryManager; -import com.google.gridworks.model.Project; - -/** - * Stub used to make protected methods public for testing - * - */ -public class ProjectManagerStub extends ProjectManager { - - public ProjectManagerStub(){ - super(); - } - - @Override - public void deleteProject(long projectID) { - // empty - - } - - @Override - public void exportProject(long projectId, TarOutputStream tos) throws IOException { - // empty - } - - @Override - public HistoryEntryManager getHistoryEntryManager() { - // empty - return null; - } - - @Override - public void importProject(long projectID, InputStream inputStream, boolean gziped) throws IOException { - // empty - } - - @Override - protected Project loadProject(long id) { - // empty - return null; - } - - @Override - public boolean loadProjectMetadata(long projectID) { - // empty - return false; - } - - @Override - public void saveMetadata(ProjectMetadata metadata, long projectId) throws Exception { - // empty - - } - - @Override - public void saveProject(Project project) { - // empty - } - - //Overridden to make public for testing - @Override - public void saveProjects(boolean allModified){ - super.saveProjects(allModified); - } - - @Override - protected void saveWorkspace() { - // empty - } - -} diff --git a/main/tests/server/src/com/google/gridworks/tests/ProjectManagerTests.java b/main/tests/server/src/com/google/gridworks/tests/ProjectManagerTests.java deleted file mode 100644 index 2a403a48c..000000000 --- a/main/tests/server/src/com/google/gridworks/tests/ProjectManagerTests.java +++ /dev/null @@ -1,212 +0,0 @@ -package com.google.gridworks.tests; - -import java.util.Date; -import java.util.GregorianCalendar; - -import org.mockito.Mockito; -import org.slf4j.LoggerFactory; -import org.testng.Assert; -import org.testng.annotations.AfterMethod; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.BeforeTest; -import org.testng.annotations.Test; - -import com.google.gridworks.ProjectMetadata; -import com.google.gridworks.model.Project; -import com.google.gridworks.tests.model.ProjectStub; - -import static org.mockito.Mockito.spy; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.times; -import static org.mockito.Mockito.verifyNoMoreInteractions; -import static org.mockito.Mockito.never; - -public class ProjectManagerTests extends GridworksTest { - ProjectManagerStub pm; - ProjectManagerStub SUT; - Project project; - ProjectMetadata metadata; - - @BeforeTest - public void init() { - logger = LoggerFactory.getLogger(this.getClass()); - } - - @BeforeMethod - public void SetUp(){ - pm = new ProjectManagerStub(); - SUT = spy(pm); - project = mock(Project.class); - metadata = mock(ProjectMetadata.class); - } - - @AfterMethod - public void TearDown(){ - metadata = null; - project = null; - SUT = null; - pm = null; - } - - @Test - public void canRegisterProject(){ - - SUT.registerProject(project, metadata); - - AssertProjectRegistered(); - - verifyNoMoreInteractions(project); - verifyNoMoreInteractions(metadata); - } - - //TODO test registerProject in race condition - - @Test - public void canEnsureProjectSave(){ - whenGetSaveTimes(project, metadata); - registerProject(); - - //run test - SUT.ensureProjectSaved(project.id); - - //assert and verify - AssertProjectRegistered(); - try { - verify(SUT, times(1)).saveMetadata(metadata, project.id); - } catch (Exception e) { - Assert.fail(); - } - this.verifySaveTimeCompared(1); - verify(SUT, times(1)).saveProject(project); - - //ensure end - verifyNoMoreInteractions(project); - verifyNoMoreInteractions(metadata); - } - - //TODO test ensureProjectSave in race condition - - @Test - public void canSaveAllModified(){ - whenGetSaveTimes(project, metadata); //5 minute difference - registerProject(project, metadata); - - //add a second project to the cache - Project project2 = spy(new ProjectStub(2)); - ProjectMetadata metadata2 = mock(ProjectMetadata.class); - whenGetSaveTimes(project2, metadata2, 10); //not modified since the last save but within 30 seconds flush limit - registerProject(project2, metadata2); - - //check that the two projects are not the same - Assert.assertFalse(project.id == project2.id); - - SUT.save(true); - - verifySaved(project, metadata); - - verifySaved(project2, metadata2); - - verify(SUT, times(1)).saveWorkspace(); - } - - @Test - public void canFlushFromCache(){ - - whenGetSaveTimes(project, metadata, -10 );//already saved (10 seconds before) - registerProject(project, metadata); - Assert.assertSame(SUT.getProject(0), project); - - SUT.save(true); - - verify(metadata, times(1)).getModified(); - verify(project, times(2)).getLastSave(); - verify(SUT, never()).saveProject(project); - Assert.assertEquals(SUT.getProject(0), null); - verifyNoMoreInteractions(project); - verifyNoMoreInteractions(metadata); - - verify(SUT, times(1)).saveWorkspace(); - } - - @Test - public void cannotSaveWhenBusy(){ - registerProject(); - SUT.setBusy(true); - - SUT.save(false); - - verify(SUT, never()).saveProjects(Mockito.anyBoolean()); - verify(SUT, never()).saveWorkspace(); - verifyNoMoreInteractions(project); - verifyNoMoreInteractions(metadata); - } - - //TODO test canSaveAllModifiedWithRaceCondition - - @Test - public void canSaveSomeModified(){ - registerProject(); - whenGetSaveTimes(project, metadata ); - - SUT.save(false); //not busy - - verifySaved(project, metadata); - verify(SUT, times(1)).saveWorkspace(); - - } - //TODO test canSaveAllModifiedWithRaceCondition - - //-------------helpers------------- - - protected void registerProject(){ - this.registerProject(project, metadata); - } - protected void registerProject(Project proj, ProjectMetadata meta){ - SUT.registerProject(proj, meta); - } - - protected void AssertProjectRegistered(){ - Assert.assertEquals(SUT.getProject(project.id), project); - Assert.assertEquals(SUT.getProjectMetadata(project.id), metadata); - } - - protected void whenGetSaveTimes(Project proj, ProjectMetadata meta){ - whenGetSaveTimes(proj, meta, 5); - } - protected void whenGetSaveTimes(Project proj, ProjectMetadata meta, int secondsDifference){ - whenProjectGetLastSave(proj); - whenMetadataGetModified(meta, secondsDifference); - } - - protected void whenProjectGetLastSave(Project proj){ - Date projectLastSaveDate = new GregorianCalendar(1970,01,02,00,30,00).getTime(); - when(proj.getLastSave()).thenReturn(projectLastSaveDate); - } - - protected void whenMetadataGetModified(ProjectMetadata meta){ - whenMetadataGetModified(meta, 5*60); - } - protected void whenMetadataGetModified(ProjectMetadata meta, int secondsDifference){ - Date metadataModifiedDate = new GregorianCalendar(1970,01,02,00, 30, secondsDifference).getTime(); - when(meta.getModified()).thenReturn(metadataModifiedDate); - } - - protected void verifySaveTimeCompared(int times){ - verifySaveTimeCompared(project, metadata, times); - } - protected void verifySaveTimeCompared(Project project, ProjectMetadata metadata, int times){ - verify(metadata, times(times)).getModified(); - verify(project, times(times)).getLastSave(); - } - - protected void verifySaved(Project proj, ProjectMetadata meta){ - verify(meta, times(1)).getModified(); - verify(proj, times(2)).getLastSave(); - verify(SUT, times(1)).saveProject(proj); - - verifyNoMoreInteractions(proj); - verifyNoMoreInteractions(meta); - } -} diff --git a/main/tests/server/src/com/google/gridworks/tests/commands/CommandStub.java b/main/tests/server/src/com/google/gridworks/tests/commands/CommandStub.java deleted file mode 100644 index 0422c5972..000000000 --- a/main/tests/server/src/com/google/gridworks/tests/commands/CommandStub.java +++ /dev/null @@ -1,40 +0,0 @@ -package com.google.gridworks.tests.commands; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; - -import org.json.JSONException; -import org.json.JSONObject; - -import com.google.gridworks.browsing.Engine; -import com.google.gridworks.commands.Command; -import com.google.gridworks.model.Project; - -/** - * Implementation of abstract class for testing Exposes protected members as public - */ -public class CommandStub extends Command { - - public Project wrapGetProject(HttpServletRequest request) - throws ServletException { - return getProject(request); - } - - public JSONObject wrapGetEngineConfig(HttpServletRequest request) - throws JSONException { - return getEngineConfig(request); - } - - public Engine wrapGetEngine(HttpServletRequest request, Project project) - throws Exception { - return getEngine(request, project); - } - - public int wrapGetIntegerParameter(HttpServletRequest request, String name,int def) { - return getIntegerParameter(request, name, def); - } - - public JSONObject wrapGetJsonParameter(HttpServletRequest request,String name) { - return getJsonParameter(request, name); - } -} diff --git a/main/tests/server/src/com/google/gridworks/tests/commands/CommandTests.java b/main/tests/server/src/com/google/gridworks/tests/commands/CommandTests.java deleted file mode 100644 index 5783bdfb9..000000000 --- a/main/tests/server/src/com/google/gridworks/tests/commands/CommandTests.java +++ /dev/null @@ -1,318 +0,0 @@ -package com.google.gridworks.tests.commands; - -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.times; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; - -import org.json.JSONException; -import org.json.JSONObject; -import org.slf4j.LoggerFactory; -import org.testng.Assert; -import org.testng.annotations.AfterMethod; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.BeforeTest; -import org.testng.annotations.Test; - -import com.google.gridworks.ProjectManager; -import com.google.gridworks.browsing.Engine; -import com.google.gridworks.model.Project; -import com.google.gridworks.tests.GridworksTest; - -public class CommandTests extends GridworksTest { - - @BeforeTest - public void init() { - logger = LoggerFactory.getLogger(this.getClass()); - } - - CommandStub SUT = null; - HttpServletRequest request = null; - ProjectManager projectManager = null; - Project project = null; - - @BeforeMethod - public void SetUp() { - SUT = new CommandStub(); - request = mock(HttpServletRequest.class); - projectManager = mock(ProjectManager.class); - project = mock(Project.class); - } - - @AfterMethod - public void TearDown() { - SUT = null; - request = null; - projectManager = null; - project = null; - } - - // -----------------getProject tests------------ - - @Test - public void getProjectThrowsWithNullParameter() { - try { - SUT.wrapGetProject(null); - Assert.fail(); // should throw exception before this - } catch (IllegalArgumentException e) { - // expected - } catch (Exception e) { - Assert.fail(); - } - } - - @Test - public void getProjectThrowsIfResponseHasNoOrBrokenProjectParameter() { - when(request.getParameter("project")).thenReturn(""); // null - try { - SUT.wrapGetProject(request); - } catch (ServletException e) { - // expected - } catch (Exception e) { - Assert.fail(); - } - verify(request, times(1)).getParameter("project"); - } - - // -----------------getEngineConfig tests----------------- - @Test - public void getEngineConfigThrowsWithNullParameter() { - try { - SUT.wrapGetEngineConfig(null); - Assert.fail(); - } catch (IllegalArgumentException e) { - // expected - } catch (Exception e) { - Assert.fail(); - } - } - - @Test - public void getEngineConfigReturnsNullWithNullEngineParameter() { - when(request.getParameter("engine")).thenReturn(null); - try { - Assert.assertNull(SUT.wrapGetEngineConfig(request)); - } catch (JSONException e) { - Assert.fail(); - } catch (Exception e) { - Assert.fail(); - } - } - - @Test - public void getEngineConfigReturnsNullWithEmptyOrBadParameterValue() { - when(request.getParameter("engine")).thenReturn("sdfasdfas"); - - try { - Assert.assertNull( SUT.wrapGetEngineConfig(request) ); - } catch (JSONException e) { - Assert.fail(); - } - - verify(request, times(1)).getParameter("engine"); - } - - @Test - public void getEngineConfigRegressionTest() { - when(request.getParameter("engine")).thenReturn("{\"hello\":\"world\"}"); - JSONObject o = null; - try { - o = SUT.wrapGetEngineConfig(request); - Assert.assertEquals("world", o.getString("hello")); - } catch (JSONException e) { - Assert.fail(); - } catch (Exception e) { - Assert.fail(); - } - verify(request, times(1)).getParameter("engine"); - } - - // -----------------getEngine tests---------------------- - @Test - public void getEngineThrowsOnNullParameter() { - try { - SUT.wrapGetEngine(null, null); - } catch (IllegalArgumentException e) { - // expected - } catch (Exception e) { - Assert.fail(); - } - - try { - SUT.wrapGetEngine(null, project); - } catch (IllegalArgumentException e) { - // expected - } catch (Exception e) { - Assert.fail(); - } - - try { - SUT.wrapGetEngine(request, null); - } catch (IllegalArgumentException e) { - // expected - } catch (Exception e) { - Assert.fail(); - } - } - - @Test - public void getEngineRegressionTest() { - // TODO refactor getEngine to use dependency injection, so a mock Engine - // object can be used. - - Engine engine = null; - when(request.getParameter("engine")).thenReturn("{\"hello\":\"world\"}"); - - try { - engine = SUT.wrapGetEngine(request, project); - Assert.assertNotNull(engine); - } catch (Exception e) { - Assert.fail(); - } - - verify(request, times(1)).getParameter("engine"); - // JSON configuration doesn't have 'facets' key or 'INCLUDE_DEPENDENT' - // key, so there should be no further action - // Engine._facets is protected so can't test that it is of zero length. - } - - // ------------------ - @Test - public void getIntegerParameterWithNullParameters() { - // all null - try { - SUT.wrapGetIntegerParameter(null, null, 0); - Assert.fail(); - } catch (IllegalArgumentException e) { - // expected - } - - // request null - try { - SUT.wrapGetIntegerParameter(null, "name", 0); - Assert.fail(); - } catch (IllegalArgumentException e) { - // expected - } - } - - @Test - public void getIntegerParametersWithIncorrectParameterName() { - - when(request.getParameter(null)).thenReturn(null); - when(request.getParameter("incorrect")).thenReturn(null); - - // name null - try { - int returned = SUT.wrapGetIntegerParameter(request, null, 5); - Assert.assertEquals(5, returned); - } catch (IllegalArgumentException e) { - Assert.fail(); - } - - // name incorrect - try { - int returned = SUT.wrapGetIntegerParameter(request, "incorrect", 5); - Assert.assertEquals(5, returned); - } catch (IllegalArgumentException e) { - Assert.fail(); - } - - verify(request, times(1)).getParameter(null); - verify(request, times(1)).getParameter("incorrect"); - } - - @Test - public void getIntegerParametersRegressionTest() { - when(request.getParameter("positivenumber")).thenReturn("22"); - when(request.getParameter("zeronumber")).thenReturn("0"); - when(request.getParameter("negativenumber")).thenReturn("-40"); - - // positive - try { - int returned = SUT.wrapGetIntegerParameter(request,"positivenumber", 5); - Assert.assertEquals(22, returned); - } catch (IllegalArgumentException e) { - Assert.fail(); - } - - // zero - try { - int returned = SUT.wrapGetIntegerParameter(request, "zeronumber", 5); - Assert.assertEquals(0, returned); - } catch (IllegalArgumentException e) { - Assert.fail(); - } - - // negative - try { - int returned = SUT.wrapGetIntegerParameter(request, - "negativenumber", 5); - Assert.assertEquals(-40, returned); - } catch (IllegalArgumentException e) { - Assert.fail(); - } - - verify(request, times(1)).getParameter("positivenumber"); - verify(request, times(1)).getParameter("zeronumber"); - verify(request, times(1)).getParameter("negativenumber"); - } - - // ---------------------getJsonParameter tests---------------- - @Test - public void getJsonParameterWithNullParameters() { - when(request.getParameter(null)).thenReturn(null); - when(request.getParameter("")).thenReturn(null); - - try { - SUT.wrapGetJsonParameter(null, null); - Assert.fail(); - } catch (IllegalArgumentException e) { - // expected - } - - Assert.assertNull(SUT.wrapGetJsonParameter(request, null)); - - try { - SUT.wrapGetJsonParameter(null, "test"); - } catch (IllegalArgumentException e) { - // expected - } - - Assert.assertNull(SUT.wrapGetJsonParameter(request, "")); - - verify(request, times(1)).getParameter(null); - verify(request, times(1)).getParameter(""); - } - - @Test - public void getJsonParameterRegressionTest() { - when(request.getParameter("test")).thenReturn("{\"foo\":\"bar\"}"); - - JSONObject o = SUT.wrapGetJsonParameter(request, "test"); - Assert.assertNotNull(o); - try { - Assert.assertEquals("bar", o.getString("foo")); - } catch (JSONException e) { - Assert.fail(); - } - - verify(request, times(1)).getParameter("test"); - } - - @Test - public void getJsonParameterWithMalformedJson() { - when(request.getParameter("test")).thenReturn("brokenJSON"); - - try { - Assert.assertNull(SUT.wrapGetJsonParameter(request, "test")); - } catch (Exception e) { - Assert.fail(); - } - - verify(request, times(1)).getParameter("test"); - } -} diff --git a/main/tests/server/src/com/google/gridworks/tests/commands/util/CancelProcessesCommandTests.java b/main/tests/server/src/com/google/gridworks/tests/commands/util/CancelProcessesCommandTests.java deleted file mode 100644 index f9bb5390f..000000000 --- a/main/tests/server/src/com/google/gridworks/tests/commands/util/CancelProcessesCommandTests.java +++ /dev/null @@ -1,212 +0,0 @@ -package com.google.gridworks.tests.commands.util; - -import static org.mockito.Matchers.anyLong; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.times; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; - -import java.io.IOException; -import java.io.PrintWriter; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.slf4j.LoggerFactory; -import org.testng.Assert; -import org.testng.annotations.AfterMethod; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.BeforeTest; -import org.testng.annotations.Test; - -import com.google.gridworks.ProjectManager; -import com.google.gridworks.commands.history.CancelProcessesCommand; -import com.google.gridworks.model.Project; -import com.google.gridworks.process.ProcessManager; -import com.google.gridworks.tests.GridworksTest; - -public class CancelProcessesCommandTests extends GridworksTest { - - @BeforeTest - public void init() { - logger = LoggerFactory.getLogger(this.getClass()); - } - - // System Under Test - CancelProcessesCommand SUT = null; - - // variables - long PROJECT_ID_LONG = 1234; - String PROJECT_ID = "1234"; - - // mocks - HttpServletRequest request = null; - HttpServletResponse response = null; - ProjectManager projMan = null; - Project proj = null; - ProcessManager processMan = null; - PrintWriter pw = null; - - @BeforeMethod - public void SetUp() { - projMan = mock(ProjectManager.class); - ProjectManager.singleton = projMan; - proj = mock(Project.class); - processMan = mock(ProcessManager.class); - pw = mock(PrintWriter.class); - - request = mock(HttpServletRequest.class); - response = mock(HttpServletResponse.class); - SUT = new CancelProcessesCommand(); - } - - @AfterMethod - public void TearDown() { - SUT = null; - - projMan = null; - ProjectManager.singleton = null; - proj = null; - pw = null; - request = null; - response = null; - } - - @Test - public void doPostFailsThrowsWithNullParameters() { - - // both parameters null - try { - SUT.doPost(null, null); - Assert.fail(); // should have thrown exception by this point - } catch (IllegalArgumentException e){ - //expected - } catch (ServletException e) { - Assert.fail(); - } catch (Exception e) { - Assert.fail(); - } - - // request is null - try { - SUT.doPost(null, response); - Assert.fail(); // should have thrown exception by this point - } catch (IllegalArgumentException e){ - //expected - } catch (ServletException e) { - Assert.fail(); - } catch (Exception e) { - Assert.fail(); - } - - // response parameter null - try { - SUT.doPost(request, null); - Assert.fail(); // should have thrown exception by this point - } catch (IllegalArgumentException e){ - // expected - } catch (ServletException e) { - Assert.fail(); - } catch (Exception e) { - Assert.fail(); - } - } - - /** - * Contract for a complete working post - */ - @Test - public void doPostRegressionTest() { - - // mock dependencies - when(request.getParameter("project")).thenReturn(PROJECT_ID); - when(projMan.getProject(anyLong())).thenReturn(proj); - when(proj.getProcessManager()).thenReturn(processMan); - try { - when(response.getWriter()).thenReturn(pw); - } catch (IOException e1) { - Assert.fail(); - } - - // run - try { - SUT.doPost(request, response); - } catch (ServletException e) { - Assert.fail(); - } catch (IOException e) { - Assert.fail(); - } - - // verify - verify(request, times(1)).getParameter("project"); - verify(projMan, times(1)).getProject(PROJECT_ID_LONG); - - verify(processMan, times(1)).cancelAll(); - verify(response, times(1)).setCharacterEncoding("UTF-8"); - verify(response, times(1)) - .setHeader("Content-Type", "application/json"); - verify(proj, times(1)).getProcessManager(); - try { - verify(response, times(1)).getWriter(); - } catch (IOException e) { - Assert.fail(); - } - verify(pw, times(1)).write("{ \"code\" : \"ok\" }"); - } - - @Test - public void doPostThrowsIfCommand_getProjectReturnsNull(){ - // mock dependencies - when(request.getParameter("project")).thenReturn(PROJECT_ID); - when(projMan.getProject(anyLong())) - .thenReturn(null); - - // run - try { - SUT.doPost(request, response); - } catch (ServletException e) { - //expected - } catch (IOException e) { - Assert.fail(); - } - - // verify - verify(request, times(1)).getParameter("project"); - verify(projMan, times(1)).getProject(PROJECT_ID_LONG); - } - - @Test - public void doPostCatchesExceptionFromWriter(){ - String ERROR_MESSAGE = "hello world"; - - // mock dependencies - when(request.getParameter("project")).thenReturn(PROJECT_ID); - when(projMan.getProject(anyLong())).thenReturn(proj); - when(proj.getProcessManager()).thenReturn(processMan); - try { - when(response.getWriter()).thenThrow(new IllegalStateException(ERROR_MESSAGE)) - .thenReturn(pw); - } catch (IOException e) { - Assert.fail(); - } - - // run - try { - SUT.doPost(request, response); - } catch (ServletException e) { - Assert.fail(); - } catch (IOException e) { - Assert.fail(); - } - - verify(request, times(1)).getParameter("project"); - verify(projMan, times(1)).getProject(PROJECT_ID_LONG); - - verify(processMan, times(1)).cancelAll(); - verify(response, times(3)).setCharacterEncoding("UTF-8"); - //omitted other verifications for brevity. - //assumption is that expecting response.setCharacterEncoding times(3) - //implies it has Command.respondException has been called as expected - } -} diff --git a/main/tests/server/src/com/google/gridworks/tests/exporters/CsvExporterTests.java b/main/tests/server/src/com/google/gridworks/tests/exporters/CsvExporterTests.java deleted file mode 100644 index dee747a54..000000000 --- a/main/tests/server/src/com/google/gridworks/tests/exporters/CsvExporterTests.java +++ /dev/null @@ -1,186 +0,0 @@ -package com.google.gridworks.tests.exporters; - -import java.io.IOException; -import java.io.StringWriter; -import java.util.Properties; - -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.times; - -import org.slf4j.LoggerFactory; -import org.testng.Assert; -import org.testng.annotations.AfterMethod; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.BeforeTest; -import org.testng.annotations.Test; - -import com.google.gridworks.browsing.Engine; -import com.google.gridworks.exporters.CsvExporter; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.ModelException; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; -import com.google.gridworks.tests.GridworksTest; - -public class CsvExporterTests extends GridworksTest { - - @BeforeTest - public void init() { - logger = LoggerFactory.getLogger(this.getClass()); - } - - //dependencies - StringWriter writer; - Project project; - Engine engine; - Properties options; - - //System Under Test - CsvExporter SUT; - - @BeforeMethod - public void SetUp(){ - SUT = new CsvExporter(); - writer = new StringWriter(); - project = new Project(); - engine = new Engine(project); - options = mock(Properties.class); - } - - @AfterMethod - public void TearDown(){ - SUT = null; - writer = null; - project = null; - engine = null; - options = null; - } - - @Test - public void exportSimpleCsv(){ - CreateGrid(2, 2); - - try { - SUT.export(project, options, engine, writer); - } catch (IOException e) { - Assert.fail(); - } - - Assert.assertEquals(writer.toString(), "column0,column1\n" + - "row0cell0,row0cell1\n" + - "row1cell0,row1cell1\n"); - - } - - @Test - public void exportSimpleCsvNoHeader(){ - CreateGrid(2, 2); - when(options.getProperty("printColumnHeader")).thenReturn("false"); - try { - SUT.export(project, options, engine, writer); - } catch (IOException e) { - Assert.fail(); - } - - Assert.assertEquals(writer.toString(), "row0cell0,row0cell1\n" + - "row1cell0,row1cell1\n"); - - verify(options,times(2)).getProperty("printColumnHeader"); - } - - @Test - public void exportCsvWithLineBreaks(){ - CreateGrid(3,3); - - project.rows.get(1).cells.set(1, new Cell("line\n\n\nbreak", null)); - try { - SUT.export(project, options, engine, writer); - } catch (IOException e) { - Assert.fail(); - } - - Assert.assertEquals(writer.toString(), "column0,column1,column2\n" + - "row0cell0,row0cell1,row0cell2\n" + - "row1cell0,\"line\n\n\nbreak\",row1cell2\n" + - "row2cell0,row2cell1,row2cell2\n"); - } - - @Test - public void exportCsvWithComma(){ - CreateGrid(3,3); - - project.rows.get(1).cells.set(1, new Cell("with, comma", null)); - try { - SUT.export(project, options, engine, writer); - } catch (IOException e) { - Assert.fail(); - } - - Assert.assertEquals(writer.toString(), "column0,column1,column2\n" + - "row0cell0,row0cell1,row0cell2\n" + - "row1cell0,\"with, comma\",row1cell2\n" + - "row2cell0,row2cell1,row2cell2\n"); - } - - @Test - public void exportCsvWithQuote(){ - CreateGrid(3,3); - - project.rows.get(1).cells.set(1, new Cell("line has \"quote\"", null)); - try { - SUT.export(project, options, engine, writer); - } catch (IOException e) { - Assert.fail(); - } - - Assert.assertEquals(writer.toString(), "column0,column1,column2\n" + - "row0cell0,row0cell1,row0cell2\n" + - "row1cell0,\"line has \"\"quote\"\"\",row1cell2\n" + - "row2cell0,row2cell1,row2cell2\n"); - } - - @Test - public void exportCsvWithEmptyCells(){ - CreateGrid(3,3); - - project.rows.get(1).cells.set(1, null); - project.rows.get(2).cells.set(0, null); - try { - SUT.export(project, options, engine, writer); - } catch (IOException e) { - Assert.fail(); - } - - Assert.assertEquals(writer.toString(), "column0,column1,column2\n" + - "row0cell0,row0cell1,row0cell2\n" + - "row1cell0,,row1cell2\n" + - ",row2cell1,row2cell2\n"); - } - - //helper methods - - protected void CreateColumns(int noOfColumns){ - for(int i = 0; i < noOfColumns; i++){ - try { - project.columnModel.addColumn(i, new Column(i, "column" + i), true); - } catch (ModelException e1) { - Assert.fail("Could not create column"); - } - } - } - - protected void CreateGrid(int noOfRows, int noOfColumns){ - CreateColumns(noOfColumns); - - for(int i = 0; i < noOfRows; i++){ - Row row = new Row(noOfColumns); - for(int j = 0; j < noOfColumns; j++){ - row.cells.add(new Cell("row" + i + "cell" + j, null)); - } - project.rows.add(row); - } - } -} diff --git a/main/tests/server/src/com/google/gridworks/tests/exporters/TsvExporterTests.java b/main/tests/server/src/com/google/gridworks/tests/exporters/TsvExporterTests.java deleted file mode 100644 index 2523b7e9b..000000000 --- a/main/tests/server/src/com/google/gridworks/tests/exporters/TsvExporterTests.java +++ /dev/null @@ -1,188 +0,0 @@ -package com.google.gridworks.tests.exporters; - -import java.io.IOException; -import java.io.StringWriter; -import java.util.Properties; - -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.times; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; - -import org.slf4j.LoggerFactory; -import org.testng.Assert; -import org.testng.annotations.AfterMethod; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.BeforeTest; -import org.testng.annotations.Test; - -import com.google.gridworks.browsing.Engine; -import com.google.gridworks.exporters.CsvExporter; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Column; -import com.google.gridworks.model.ModelException; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; -import com.google.gridworks.tests.GridworksTest; - -public class TsvExporterTests extends GridworksTest { - - @BeforeTest - public void init() { - logger = LoggerFactory.getLogger(this.getClass()); - } - - //dependencies - StringWriter writer; - Project project; - Engine engine; - Properties options; - - //System Under Test - CsvExporter SUT; - - @BeforeMethod - public void SetUp(){ - SUT = new CsvExporter('\t');//new TsvExporter(); - writer = new StringWriter(); - project = new Project(); - engine = new Engine(project); - options = mock(Properties.class); - } - - @AfterMethod - public void TearDown(){ - SUT = null; - writer = null; - project = null; - engine = null; - options = null; - } - - @Test - public void exportSimpleTsv(){ - CreateGrid(2, 2); - - try { - SUT.export(project, options, engine, writer); - } catch (IOException e) { - Assert.fail(); - } - - Assert.assertEquals(writer.toString(), "column0\tcolumn1\n" + - "row0cell0\trow0cell1\n" + - "row1cell0\trow1cell1\n"); - - } - - @Test - public void exportSimpleTsvNoHeader(){ - CreateGrid(2, 2); - when(options.getProperty("printColumnHeader")).thenReturn("false"); - try { - SUT.export(project, options, engine, writer); - } catch (IOException e) { - Assert.fail(); - } - - Assert.assertEquals(writer.toString(), "row0cell0\trow0cell1\n" + - "row1cell0\trow1cell1\n"); - - verify(options,times(2)).getProperty("printColumnHeader"); - } - - @Test - public void exportTsvWithLineBreaks(){ - CreateGrid(3,3); - - project.rows.get(1).cells.set(1, new Cell("line\n\n\nbreak", null)); - try { - SUT.export(project, options, engine, writer); - } catch (IOException e) { - Assert.fail(); - } - - Assert.assertEquals(writer.toString(), "column0\tcolumn1\tcolumn2\n" + - "row0cell0\trow0cell1\trow0cell2\n" + - "row1cell0\t\"line\n\n\nbreak\"\trow1cell2\n" + - "row2cell0\trow2cell1\trow2cell2\n"); - } - - @Test - public void exportTsvWithComma(){ - CreateGrid(3,3); - - project.rows.get(1).cells.set(1, new Cell("with\t tab", null)); - try { - SUT.export(project, options, engine, writer); - } catch (IOException e) { - Assert.fail(); - } - - Assert.assertEquals(writer.toString(), "column0\tcolumn1\tcolumn2\n" + - "row0cell0\trow0cell1\trow0cell2\n" + - "row1cell0\t\"with\t tab\"\trow1cell2\n" + - "row2cell0\trow2cell1\trow2cell2\n"); - } - - @Test - public void exportTsvWithQuote(){ - CreateGrid(3,3); - - project.rows.get(1).cells.set(1, new Cell("line has \"quote\"", null)); - try { - SUT.export(project, options, engine, writer); - } catch (IOException e) { - Assert.fail(); - } - - Assert.assertEquals(writer.toString(), "column0\tcolumn1\tcolumn2\n" + - "row0cell0\trow0cell1\trow0cell2\n" + - "row1cell0\t\"line has \"\"quote\"\"\"\trow1cell2\n" + - "row2cell0\trow2cell1\trow2cell2\n"); - } - - @Test - public void exportTsvWithEmptyCells(){ - CreateGrid(3,3); - - project.rows.get(1).cells.set(1, null); - project.rows.get(2).cells.set(0, null); - try { - SUT.export(project, options, engine, writer); - } catch (IOException e) { - Assert.fail(); - } - - Assert.assertEquals(writer.toString(), "column0\tcolumn1\tcolumn2\n" + - "row0cell0\trow0cell1\trow0cell2\n" + - "row1cell0\t\trow1cell2\n" + - "\trow2cell1\trow2cell2\n"); - } - - //helper methods - - protected void CreateColumns(int noOfColumns){ - for(int i = 0; i < noOfColumns; i++){ - try { - project.columnModel.addColumn(i, new Column(i, "column" + i), true); - project.columnModel.columns.get(i).getCellIndex(); - } catch (ModelException e1) { - Assert.fail("Could not create column"); - } - } - } - - protected void CreateGrid(int noOfRows, int noOfColumns){ - CreateColumns(noOfColumns); - - for(int i = 0; i < noOfRows; i++){ - Row row = new Row(noOfColumns); - for(int j = 0; j < noOfColumns; j++){ - row.cells.add(new Cell("row" + i + "cell" + j, null)); - } - project.rows.add(row); - } - } -} - diff --git a/main/tests/server/src/com/google/gridworks/tests/history/HistoryTests.java b/main/tests/server/src/com/google/gridworks/tests/history/HistoryTests.java deleted file mode 100644 index 4166697b3..000000000 --- a/main/tests/server/src/com/google/gridworks/tests/history/HistoryTests.java +++ /dev/null @@ -1,68 +0,0 @@ -package com.google.gridworks.tests.history; - -import org.mockito.Mockito; -import org.slf4j.LoggerFactory; -import org.testng.Assert; -import org.testng.annotations.AfterMethod; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.BeforeTest; -import org.testng.annotations.Test; - -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.times; - -import com.google.gridworks.ProjectManager; -import com.google.gridworks.ProjectMetadata; -import com.google.gridworks.history.History; -import com.google.gridworks.history.HistoryEntry; -import com.google.gridworks.model.Project; -import com.google.gridworks.tests.GridworksTest; - - -public class HistoryTests extends GridworksTest { - @BeforeTest - public void init() { - logger = LoggerFactory.getLogger(this.getClass()); - } - - //System Under Test - History SUT; - - //dependencies - Project proj; - ProjectManager projectManager; - - @BeforeMethod - public void SetUp(){ - projectManager = mock(ProjectManager.class); - ProjectManager.singleton = projectManager; - proj = new Project(); - SUT = new History(proj); - } - - @AfterMethod - public void TearDown(){ - SUT = null; - proj = null; - } - - @Test - public void canAddEntry(){ - //local dependencies - HistoryEntry entry = mock(HistoryEntry.class); - Project project = mock(Project.class); - ProjectMetadata projectMetadata = mock(ProjectMetadata.class); - - when(projectManager.getProject(Mockito.anyLong())).thenReturn(project); - when(projectManager.getProjectMetadata(Mockito.anyLong())).thenReturn(projectMetadata); - - SUT.addEntry(entry); - - verify(projectManager, times(1)).getProject(Mockito.anyLong()); - verify(entry, times(1)).apply(project); - verify(projectMetadata, times(1)).updateModified(); - Assert.assertEquals(SUT.getLastPastEntries(1).get(0), entry); - } -} diff --git a/main/tests/server/src/com/google/gridworks/tests/importers/ImporterUtilitiesTests.java b/main/tests/server/src/com/google/gridworks/tests/importers/ImporterUtilitiesTests.java deleted file mode 100644 index 70fa66bfc..000000000 --- a/main/tests/server/src/com/google/gridworks/tests/importers/ImporterUtilitiesTests.java +++ /dev/null @@ -1,125 +0,0 @@ -package com.google.gridworks.tests.importers; - -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.times; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; - -import java.util.ArrayList; -import java.util.List; -import java.util.Properties; - -import org.slf4j.LoggerFactory; -import org.testng.Assert; -import org.testng.annotations.BeforeTest; -import org.testng.annotations.Test; - -import com.google.gridworks.importers.ImporterUtilities; -import com.google.gridworks.model.Cell; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; -import com.google.gridworks.tests.GridworksTest; - -public class ImporterUtilitiesTests extends GridworksTest { - - @BeforeTest - public void init() { - logger = LoggerFactory.getLogger(this.getClass()); - } - - @Test(enabled=false) - public void parseCellValueWithText(){ - String END_QUOTES_SHOULD_BE_RETAINED = "\"To be\" is almost always followed by \"or not to be\""; - String response = (String) ImporterUtilities.parseCellValue(END_QUOTES_SHOULD_BE_RETAINED); - Assert.assertEquals(response, END_QUOTES_SHOULD_BE_RETAINED); - } - - @Test - public void getIntegerOption(){ - Properties options = mock(Properties.class); - when(options.containsKey("testInteger")).thenReturn(true); - when(options.getProperty("testInteger")).thenReturn("5"); - int response = ImporterUtilities.getIntegerOption("testInteger", options, -1); - Assert.assertEquals(5, response); - verify(options, times(1)).containsKey("testInteger"); - verify(options, times(1)).getProperty("testInteger"); - } - - @Test - public void getIntegerOptionReturnsDefaultOnError(){ - Properties options = mock(Properties.class); - when(options.containsKey("testInteger")).thenReturn(true); - when(options.getProperty("testInteger")).thenReturn("notAnInteger"); - int response = ImporterUtilities.getIntegerOption("testInteger", options, -1); - Assert.assertEquals(-1, response); - verify(options, times(1)).containsKey("testInteger"); - verify(options, times(1)).getProperty("testInteger"); - } - - @Test - public void appendColumnName(){ - List columnNames = new ArrayList(); - - - ImporterUtilities.appendColumnName(columnNames, 0, "foo"); - ImporterUtilities.appendColumnName(columnNames, 1, "bar"); - Assert.assertEquals(columnNames.size(), 2); - Assert.assertEquals(columnNames.get(0), "foo"); - Assert.assertEquals(columnNames.get(1), "bar"); - } - - @Test - public void appendColumnNameFromMultipleRows(){ - List columnNames = new ArrayList(); - - ImporterUtilities.appendColumnName(columnNames, 0, "foo"); - ImporterUtilities.appendColumnName(columnNames, 0, "bar"); - Assert.assertEquals(columnNames.size(), 1); - Assert.assertEquals(columnNames.get(0), "foo bar"); - } - - @Test - public void ensureColumnsInRowExist(){ - String VALUE_1 = "value1"; - String VALUE_2 = "value2"; - Row row = new Row(2); - ArrayList columnNames = new ArrayList(2); - columnNames.add(VALUE_1); - columnNames.add(VALUE_2); - - ImporterUtilities.ensureColumnsInRowExist(columnNames, row); - - Assert.assertEquals(columnNames.size(), 2); - Assert.assertEquals(columnNames.get(0), VALUE_1); - Assert.assertEquals(columnNames.get(1), VALUE_2); - } - - @Test - public void ensureColumnsInRowExistDoesExpand(){ - Row row = new Row(4); - for(int i = 1; i < 5; i++) - row.cells.add(new Cell("value" + i, null)); - - ArrayList columnNames = new ArrayList(2); - - - ImporterUtilities.ensureColumnsInRowExist(columnNames, row); - - Assert.assertEquals(row.cells.size(), 4); - Assert.assertEquals(columnNames.size(), 4); - } - - @Test - public void setupColumns(){ - Project project = new Project(); - List columnNames = new ArrayList(); - columnNames.add("col1"); - columnNames.add("col2"); - columnNames.add(""); - ImporterUtilities.setupColumns(project, columnNames); - Assert.assertEquals( project.columnModel.columns.get(0).getName(), "col1" ); - Assert.assertEquals( project.columnModel.columns.get(1).getName(), "col2" ); - Assert.assertEquals( project.columnModel.columns.get(2).getName(), "Column"); - } - -} diff --git a/main/tests/server/src/com/google/gridworks/tests/importers/RdfTripleImporterTests.java b/main/tests/server/src/com/google/gridworks/tests/importers/RdfTripleImporterTests.java deleted file mode 100644 index 70306f64f..000000000 --- a/main/tests/server/src/com/google/gridworks/tests/importers/RdfTripleImporterTests.java +++ /dev/null @@ -1,159 +0,0 @@ -package com.google.gridworks.tests.importers; - -import java.io.StringReader; -import java.util.Properties; - -import org.slf4j.LoggerFactory; -import org.testng.Assert; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.BeforeTest; -import org.testng.annotations.Test; - -import com.google.gridworks.ProjectMetadata; -import com.google.gridworks.importers.RdfTripleImporter; -import com.google.gridworks.model.Project; -import com.google.gridworks.tests.GridworksTest; - - -public class RdfTripleImporterTests extends GridworksTest { - - @BeforeTest - public void init() { - logger = LoggerFactory.getLogger(this.getClass()); - } - - - //System Under Test - RdfTripleImporter SUT = null; - Project project = null; - Properties options = null; - - @BeforeMethod - public void SetUp(){ - SUT = new RdfTripleImporter(); - project = new Project(); - options = new Properties(); - options.put("base-url", "http://rdf.freebase.com"); - } - - @Test(enabled=false) - public void CanParseSingleLineTriple(){ - String sampleRdf = " ."; - StringReader reader = new StringReader(sampleRdf); - - try { - SUT.read(reader, project, new ProjectMetadata(), options); - project.update(); - } catch (Exception e) { - Assert.fail(); - } - - Assert.assertEquals(project.columnModel.columns.size(), 2); - Assert.assertEquals(project.columnModel.columns.get(0).getName(), "subject"); - Assert.assertEquals(project.columnModel.columns.get(1).getName(), "http://rdf.freebase.com/ns/music.artist.album"); - Assert.assertEquals(project.rows.size(), 1); - Assert.assertEquals(project.rows.get(0).cells.size(), 2); - Assert.assertEquals(project.rows.get(0).cells.get(0).value, "http://rdf.freebase.com/ns/en.bob_dylan"); - Assert.assertEquals(project.rows.get(0).cells.get(1).value, "http://rdf.freebase.com/ns/en.blood_on_the_tracks"); - } - - @Test - public void CanParseMultiLineTriple(){ - String sampleRdf = " .\n" + - " .\n" + - " ."; - StringReader reader = new StringReader(sampleRdf); - - try { - SUT.read(reader, project, new ProjectMetadata(), options); - project.update(); - } catch (Exception e) { - Assert.fail(); - } - - //columns - Assert.assertEquals(project.columnModel.columns.size(), 2); - Assert.assertEquals(project.columnModel.columns.get(0).getName(), "subject"); - Assert.assertEquals(project.columnModel.columns.get(1).getName(), "http://rdf.freebase.com/ns/music.artist.album"); - - //rows - Assert.assertEquals(project.rows.size(), 3); - - //row0 - Assert.assertEquals(project.rows.get(0).cells.size(), 2); - Assert.assertEquals(project.rows.get(0).cells.get(0).value, "http://rdf.freebase.com/ns/en.bob_dylan"); - Assert.assertEquals(project.rows.get(0).cells.get(1).value, "http://rdf.freebase.com/ns/en.blood_on_the_tracks"); - - //row1 - Assert.assertEquals(project.rows.get(1).cells.size(), 2); - Assert.assertNull(project.rows.get(1).cells.get(0)); - Assert.assertEquals(project.rows.get(1).cells.get(1).value, "http://rdf.freebase.com/ns/en.bringing_it_all_back_home"); //NB triples aren't created in order they were input - Assert.assertEquals(project.recordModel.getRowDependency(1).cellDependencies[1].rowIndex, 0); - Assert.assertEquals(project.recordModel.getRowDependency(1).cellDependencies[1].cellIndex, 0); - - //row2 - Assert.assertEquals(project.rows.get(2).cells.size(), 2); - Assert.assertNull(project.rows.get(2).cells.get(0)); - Assert.assertEquals(project.rows.get(2).cells.get(1).value, "http://rdf.freebase.com/ns/en.under_the_red_sky"); //NB triples aren't created in order they were input - Assert.assertEquals(project.recordModel.getRowDependency(2).cellDependencies[1].rowIndex, 0); - Assert.assertEquals(project.recordModel.getRowDependency(2).cellDependencies[1].cellIndex, 0); - } - - @Test - public void CanParseMultiLineMultiPredicatesTriple(){ - String sampleRdf = " .\n" + - " .\n" + - " ."; - StringReader reader = new StringReader(sampleRdf); - - try { - SUT.read(reader, project, new ProjectMetadata(), options); - project.update(); - } catch (Exception e) { - Assert.fail(); - } - - //columns - Assert.assertEquals(project.columnModel.columns.size(), 3); - Assert.assertEquals(project.columnModel.columns.get(0).getName(), "subject"); - Assert.assertEquals(project.columnModel.columns.get(1).getName(), "http://rdf.freebase.com/ns/music.artist.album"); - Assert.assertEquals(project.columnModel.columns.get(2).getName(), "http://rdf.freebase.com/ns/music.artist.genre"); - - //rows - Assert.assertEquals(project.rows.size(), 2); - - //row0 - Assert.assertEquals(project.rows.get(0).cells.size(), 3); - Assert.assertEquals(project.rows.get(0).cells.get(0).value, "http://rdf.freebase.com/ns/en.bob_dylan"); - Assert.assertEquals(project.rows.get(0).cells.get(1).value, "http://rdf.freebase.com/ns/en.blood_on_the_tracks"); - Assert.assertEquals(project.rows.get(0).cells.get(2).value, "http://rdf.freebase.com/ns/en.folk_rock"); - - //row1 - Assert.assertEquals(project.rows.get(1).cells.size(), 2); - Assert.assertNull(project.rows.get(1).cells.get(0)); - Assert.assertEquals(project.rows.get(1).cells.get(1).value, "http://rdf.freebase.com/ns/en.bringing_it_all_back_home"); - Assert.assertEquals(project.recordModel.getRowDependency(1).cellDependencies[1].rowIndex, 0); - Assert.assertEquals(project.recordModel.getRowDependency(1).cellDependencies[1].cellIndex, 0); - } - - @Test - public void CanParseTripleWithValue(){ - String sampleRdf = " \"Robert Zimmerman\"@en."; - StringReader reader = new StringReader(sampleRdf); - - try { - SUT.read(reader, project, new ProjectMetadata(), options); - project.update(); - } catch (Exception e) { - Assert.fail(); - } - - Assert.assertEquals(project.columnModel.columns.size(), 2); - Assert.assertEquals(project.columnModel.columns.get(0).getName(), "subject"); - Assert.assertEquals(project.columnModel.columns.get(1).getName(), "http://rdf.freebase.com/ns/common.topic.alias"); - Assert.assertEquals(project.rows.size(), 1); - Assert.assertEquals(project.rows.get(0).cells.size(), 2); - Assert.assertEquals(project.rows.get(0).cells.get(0).value, "http://rdf.freebase.com/ns/en.bob_dylan"); - Assert.assertEquals(project.rows.get(0).cells.get(1).value, "\"Robert Zimmerman\"@en"); - } -} diff --git a/main/tests/server/src/com/google/gridworks/tests/importers/TsvCsvImporterTests.java b/main/tests/server/src/com/google/gridworks/tests/importers/TsvCsvImporterTests.java deleted file mode 100644 index 7df7b5afa..000000000 --- a/main/tests/server/src/com/google/gridworks/tests/importers/TsvCsvImporterTests.java +++ /dev/null @@ -1,549 +0,0 @@ -package com.google.gridworks.tests.importers; - -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.times; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; - -import java.io.IOException; -import java.io.LineNumberReader; -import java.io.StringReader; -import java.util.Properties; - -import org.slf4j.LoggerFactory; -import org.testng.Assert; -import org.testng.annotations.AfterMethod; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.BeforeTest; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -import com.google.gridworks.ProjectMetadata; -import com.google.gridworks.importers.TsvCsvImporter; -import com.google.gridworks.model.Project; -import com.google.gridworks.tests.GridworksTest; - -public class TsvCsvImporterTests extends GridworksTest { - - @BeforeTest - public void init() { - logger = LoggerFactory.getLogger(this.getClass()); - } - - //constants - String SAMPLE_ROW = "NDB_No,Shrt_Desc,Water"; - - //System Under Test - TsvCsvImporter SUT = null; - - //mock dependencies - Project project = null; - Properties properties = null; - - - @BeforeMethod - public void SetUp(){ - SUT = new TsvCsvImporter(); - project = new Project(); //FIXME - should we try and use mock(Project.class); - seems unnecessary complexity - properties = mock(Properties.class); - } - - @AfterMethod - public void TearDown(){ - SUT = null; - project = null; - properties = null; - } - - @Test(dataProvider = "CSV-or-null") - public void readJustColumns(String sep){ - String input = "col1,col2,col3"; - LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); - - try { - SUT.read(lnReader, project, sep, -1, 0, 0, 1, false, true, false); - } catch (IOException e) { - Assert.fail(); - } - Assert.assertEquals(project.columnModel.columns.size(), 3); - Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1"); - Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2"); - Assert.assertEquals(project.columnModel.columns.get(2).getName(), "col3"); - } - - @Test(dataProvider = "CSV-or-null") - public void readUnseperatedData(String sep){ - String input = "value1,value2,value3"; - LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); - - try { - SUT.read(lnReader, project, sep, -1, 0, 0, 0, false, false, false); - } catch (IOException e) { - Assert.fail(); - } - Assert.assertEquals(project.columnModel.columns.size(), 1); - Assert.assertEquals(project.columnModel.columns.get(0).getName(), "Column"); - Assert.assertEquals(project.rows.size(), 1); - Assert.assertEquals(project.rows.get(0).cells.size(), 1); - Assert.assertEquals(project.rows.get(0).cells.get(0).value, input); - } - - @Test(dataProvider = "CSV-or-null") - public void readSimpleData_CSV_1Header_1Row(String sep){ - String input = "col1,col2,col3\n" + - "data1,data2,data3"; - LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); - try { - SUT.read(lnReader, project, sep, -1, 0, 0, 1, false, true, false); - } catch (IOException e) { - Assert.fail(); - } - Assert.assertEquals(project.columnModel.columns.size(), 3); - Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1"); - Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2"); - Assert.assertEquals(project.columnModel.columns.get(2).getName(), "col3"); - Assert.assertEquals(project.rows.size(), 1); - Assert.assertEquals(project.rows.get(0).cells.size(), 3); - Assert.assertEquals(project.rows.get(0).cells.get(0).value, "data1"); - Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data2"); - Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data3"); - } - - @Test - public void readSimpleData_TSV_1Header_1Row(){ - String input = "col1\tcol2\tcol3\n" + - "data1\tdata2\tdata3"; - LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); - try { - SUT.read(lnReader, project, "\t", -1, 0, 0, 1, false, true, false); - } catch (IOException e) { - Assert.fail(); - } - Assert.assertEquals(project.columnModel.columns.size(), 3); - Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1"); - Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2"); - Assert.assertEquals(project.columnModel.columns.get(2).getName(), "col3"); - Assert.assertEquals(project.rows.size(), 1); - Assert.assertEquals(project.rows.get(0).cells.size(), 3); - Assert.assertEquals(project.rows.get(0).cells.get(0).value, "data1"); - Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data2"); - Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data3"); - } - - @Test(dataProvider = "CSV-or-null") - public void readSimpleData_CSV_1Header_1Row_GuessValues(String sep){ - String input = "col1,col2,col3\n" + - "data1,234,data3"; - LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); - try { - SUT.read(lnReader, project, sep, -1, 0, 0, 1, true, true, false); - } catch (IOException e) { - Assert.fail(); - } - Assert.assertEquals(project.columnModel.columns.size(), 3); - Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1"); - Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2"); - Assert.assertEquals(project.columnModel.columns.get(2).getName(), "col3"); - Assert.assertEquals(project.rows.size(), 1); - Assert.assertEquals(project.rows.get(0).cells.size(), 3); - Assert.assertEquals(project.rows.get(0).cells.get(0).value, "data1"); - Assert.assertTrue(project.rows.get(0).cells.get(1).value instanceof Long); - Assert.assertEquals(project.rows.get(0).cells.get(1).value, Long.parseLong("234")); - Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data3"); - } - - @Test(dataProvider = "CSV-or-null") - public void readSimpleData_0Header_1Row(String sep){ - String input = "data1,data2,data3"; - LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); - try { - SUT.read(lnReader, project, sep, -1, 0, 0, 0, false, true, false); - } catch (IOException e) { - Assert.fail(); - } - Assert.assertEquals(project.columnModel.columns.size(), 3); - Assert.assertEquals(project.columnModel.columns.get(0).getName(), "Column"); - Assert.assertEquals(project.columnModel.columns.get(1).getName(), "Column2"); - Assert.assertEquals(project.columnModel.columns.get(2).getName(), "Column3"); - Assert.assertEquals(project.rows.size(), 1); - Assert.assertEquals(project.rows.get(0).cells.size(), 3); - Assert.assertEquals(project.rows.get(0).cells.get(0).value, "data1"); - Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data2"); - Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data3"); - } - - @Test(groups = { }, dataProvider = "CSV-or-null") - public void readDoesTrimsLeadingTrailingWhitespace(String sep){ - String input = " data1 , data2 , data3 "; - LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); - try { - SUT.read(lnReader, project, sep, -1, 0, 0, 0, false, true, false); - } catch (IOException e) { - Assert.fail(); - } - Assert.assertEquals(project.columnModel.columns.size(), 3); - Assert.assertEquals(project.rows.size(), 1); - Assert.assertEquals(project.rows.get(0).cells.size(), 3); - Assert.assertEquals(project.rows.get(0).cells.get(0).value, "data1"); - Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data2"); - Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data3"); - } - - @Test(dataProvider = "CSV-or-null") - public void readTrimsLeadingTrailingWhitespace(String sep){ - String input = " data1, data2, data3"; - LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); - try { - SUT.read(lnReader, project, sep, -1, 0, 0, 0, true, true, false); - } catch (IOException e) { - Assert.fail(); - } - Assert.assertEquals(project.columnModel.columns.size(), 3); - Assert.assertEquals(project.rows.size(), 1); - Assert.assertEquals(project.rows.get(0).cells.size(), 3); - Assert.assertEquals(project.rows.get(0).cells.get(0).value, "data1"); - Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data2"); - Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data3"); - } - - @Test(dataProvider = "CSV-or-null") - public void readCanAddNull(String sep){ - String input = " data1, , data3"; - LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); - try { - SUT.read(lnReader, project, sep, -1, 0, 0, 0, true, true, false); - } catch (IOException e) { - Assert.fail(); - } - Assert.assertEquals(project.columnModel.columns.size(), 3); - Assert.assertEquals(project.rows.size(), 1); - Assert.assertEquals(project.rows.get(0).cells.size(), 3); - Assert.assertEquals(project.rows.get(0).cells.get(0).value, "data1"); - Assert.assertNull(project.rows.get(0).cells.get(1)); - Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data3"); - } - - @Test(dataProvider = "CSV-or-null") - public void readSimpleData_2Header_1Row(String sep){ - String input = "col1,col2,col3\n" + - "sub1,sub2,sub3\n" + - "data1,data2,data3"; - LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); - try { - SUT.read(lnReader, project, sep, -1, 0, 0, 2, false, true, false); - } catch (IOException e) { - Assert.fail(); - } - Assert.assertEquals(project.columnModel.columns.size(), 3); - Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1 sub1"); - Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2 sub2"); - Assert.assertEquals(project.columnModel.columns.get(2).getName(), "col3 sub3"); - Assert.assertEquals(project.rows.size(), 1); - Assert.assertEquals(project.rows.get(0).cells.size(), 3); - Assert.assertEquals(project.rows.get(0).cells.get(0).value, "data1"); - Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data2"); - Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data3"); - } - - @Test(dataProvider = "CSV-or-null") - public void readSimpleData_RowLongerThanHeader(String sep){ - String input = "col1,col2,col3\n" + - "data1,data2,data3,data4,data5,data6"; - LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); - try { - SUT.read(lnReader, project, sep, -1, 0, 0, 1, false, true, false); - } catch (IOException e) { - Assert.fail(); - } - Assert.assertEquals(project.columnModel.columns.size(), 6); - Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1"); - Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2"); - Assert.assertEquals(project.columnModel.columns.get(2).getName(), "col3"); - Assert.assertEquals(project.columnModel.columns.get(3).getName(), "Column"); - Assert.assertEquals(project.columnModel.columns.get(3).getName(), "Column"); - Assert.assertEquals(project.columnModel.columns.get(3).getName(), "Column"); - Assert.assertEquals(project.rows.size(), 1); - Assert.assertEquals(project.rows.get(0).cells.size(), 6); - Assert.assertEquals(project.rows.get(0).cells.get(0).value, "data1"); - Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data2"); - Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data3"); - Assert.assertEquals(project.rows.get(0).cells.get(3).value, "data4"); - Assert.assertEquals(project.rows.get(0).cells.get(4).value, "data5"); - Assert.assertEquals(project.rows.get(0).cells.get(5).value, "data6"); - } - - @Test(groups = { }, dataProvider = "CSV-or-null") - public void readQuotedData(String sep){ - String input = "col1,col2,col3\n" + - "\"\"\"To Be\"\" is often followed by \"\"or not To Be\"\"\",data2"; - LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); - try { - SUT.read(lnReader, project, sep, -1, 0, 0, 1, false, true, false); - } catch (IOException e) { - Assert.fail(); - } - Assert.assertEquals(project.columnModel.columns.size(), 3); - Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1"); - Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2"); - Assert.assertEquals(project.columnModel.columns.get(2).getName(), "col3"); - Assert.assertEquals(project.rows.size(), 1); - Assert.assertEquals(project.rows.get(0).cells.size(), 2); - Assert.assertEquals(project.rows.get(0).cells.get(0).value, "\"To Be\" is often followed by \"or not To Be\""); - Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data2"); - } - - @Test(dataProvider = "CSV-or-null") - public void readIgnoreFirstLine(String sep){ - String input = "ignore1\n" + - "col1,col2,col3\n" + - "data1,data2,data3"; - LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); - try { - SUT.read(lnReader, project, sep, -1, 0, 1, 1, false, true, false); - } catch (IOException e) { - Assert.fail(); - } - Assert.assertEquals(project.columnModel.columns.size(), 3); - Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1"); - Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2"); - Assert.assertEquals(project.columnModel.columns.get(2).getName(), "col3"); - Assert.assertEquals(project.rows.size(), 1); - Assert.assertEquals(project.rows.get(0).cells.size(), 3); - Assert.assertEquals(project.rows.get(0).cells.get(0).value, "data1"); - Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data2"); - Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data3"); - } - - @Test(dataProvider = "CSV-or-null") - public void readSkipFirstDataLine(String sep){ - String input = "col1,col2,col3\n" + - "skip1\n" + - "data1,data2,data3"; - LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); - try { - SUT.read(lnReader, project, sep, -1, 1, 0, 1, false, true, false); - } catch (IOException e) { - Assert.fail(); - } - Assert.assertEquals(project.columnModel.columns.size(), 3); - Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1"); - Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2"); - Assert.assertEquals(project.columnModel.columns.get(2).getName(), "col3"); - Assert.assertEquals(project.rows.size(), 1); - Assert.assertEquals(project.rows.get(0).cells.size(), 3); - Assert.assertEquals(project.rows.get(0).cells.get(0).value, "data1"); - Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data2"); - Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data3"); - } - - @Test(dataProvider = "CSV-or-null") - public void readIgnore3_Header2_Skip1(String sep){ - String input = "ignore1\n" + - "ignore2\n" + - "ignore3\n" + - "col1,col2,col3\n" + - "sub1,sub2,sub3\n" + - "skip1\n" + - "data1,data2,data3"; - LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); - try { - SUT.read(lnReader, project, sep, -1, 1, 3, 2, false, true, false); - } catch (IOException e) { - Assert.fail(); - } - Assert.assertEquals(project.columnModel.columns.size(), 3); - Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1 sub1"); - Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2 sub2"); - Assert.assertEquals(project.columnModel.columns.get(2).getName(), "col3 sub3"); - Assert.assertEquals(project.rows.size(), 1); - Assert.assertEquals(project.rows.get(0).cells.size(), 3); - Assert.assertEquals(project.rows.get(0).cells.get(0).value, "data1"); - Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data2"); - Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data3"); - } - - @Test(groups = { }, dataProvider = "CSV-or-null") - public void readIgnore3_Header2_Skip2_limit2(String sep){ - String input = "ignore1\n" + - "ignore2\n" + - "ignore3\n" + - "col1,col2,col3\n" + - "sub1,sub2,sub3\n" + - "skip1\n" + - "skip2\n" + - "data-row1-cell1,data-row1-cell2,data-row1-cell3\n" + - "data-row2-cell1,data-row2-cell2,\n" + //missing last data point of this row on purpose - "data-row3-cell1,data-row3-cell2,data-row1-cell3"; - LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); - try { - SUT.read(lnReader, project, sep, 2, 2, 3, 2, false, true, false); - } catch (IOException e) { - Assert.fail(); - } - Assert.assertEquals(project.columnModel.columns.size(), 3); - Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1 sub1"); - Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2 sub2"); - Assert.assertEquals(project.columnModel.columns.get(2).getName(), "col3 sub3"); - Assert.assertEquals(project.rows.size(), 2); - Assert.assertEquals(project.rows.get(0).cells.size(), 3); - Assert.assertEquals(project.rows.get(0).cells.get(0).value, "data-row1-cell1"); - Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data-row1-cell2"); - Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data-row1-cell3"); - Assert.assertEquals(project.rows.get(1).cells.size(), 3); - Assert.assertEquals(project.rows.get(1).cells.get(0).value, "data-row2-cell1"); - Assert.assertEquals(project.rows.get(1).cells.get(1).value, "data-row2-cell2"); - Assert.assertNull(project.rows.get(1).cells.get(2)); - } - - @Test(dataProvider = "CSV-or-null") - public void ignoreQuotes(String sep){ - String input = "data1,data2\",data3,data4"; - LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); - try { - SUT.read(lnReader, project, sep, -1, 0, 0, 0, false, true, true); - } catch (IOException e) { - Assert.fail(); - } - //Assert.assertEquals(project.columnModel.columns.size(), 4); - Assert.assertEquals(project.rows.size(), 1); - //Assert.assertEquals(project.rows.get(0).cells.size(), 4); - Assert.assertEquals(project.rows.get(0).cells.get(0).value, "data1"); - Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data2"); - Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data3"); - } - - @Test(groups = { }, dataProvider = "CSV-or-null") - public void readWithMultiLinedQuotedData(String sep){ - String input = "col1,col2,col3\n" + - "\"\"\"To\n Be\"\" is often followed by \"\"or not To\n Be\"\"\",data2"; - LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); - try { - SUT.read(lnReader, project, sep, -1, 0, 0, 1, false, true, false); - } catch (IOException e) { - Assert.fail(); - } - Assert.assertEquals(project.columnModel.columns.size(), 3); - Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1"); - Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2"); - Assert.assertEquals(project.columnModel.columns.get(2).getName(), "col3"); - Assert.assertEquals(project.rows.size(), 1); - Assert.assertEquals(project.rows.get(0).cells.size(), 2); - Assert.assertEquals(project.rows.get(0).cells.get(0).value, "\"To\n Be\" is often followed by \"or not To\n Be\""); - Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data2"); - } - - @Test(groups = { }, dataProvider = "CSV-or-null") - public void readWithMultiLinedQuotedDataAndBlankLines(String sep){ - String input = "col1,col2,col3\n" + - "\"A line with many \n\n\n\n\n empty lines\",data2"; - LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); - try { - SUT.read(lnReader, project, sep, -1, 0, 0, 1, false, true, false); - } catch (IOException e) { - Assert.fail(); - } - Assert.assertEquals(project.columnModel.columns.size(), 3); - Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1"); - Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2"); - Assert.assertEquals(project.columnModel.columns.get(2).getName(), "col3"); - Assert.assertEquals(project.rows.size(), 1); - Assert.assertEquals(project.rows.get(0).cells.size(), 2); - Assert.assertEquals(project.rows.get(0).cells.get(0).value, "A line with many \n\n\n\n\n empty lines"); - Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data2"); - } - - //---------------------read tests------------------------ - @Test - public void readCsvWithProperties(){ - StringReader reader = new StringReader(SAMPLE_ROW); - - when(properties.getProperty("separator")).thenReturn(","); - whenGetIntegerOption("ignore",properties,0); - whenGetIntegerOption("header-lines",properties,0); - whenGetIntegerOption("limit",properties,-1); - whenGetIntegerOption("skip",properties,0); - whenGetIntegerOption("ignore-quotes",properties,0); - - try { - SUT.read(reader, project, new ProjectMetadata(), properties); - } catch (Exception e) { - Assert.fail(); - } - - - Assert.assertEquals(project.rows.size(), 1); - Assert.assertEquals(project.rows.get(0).cells.size(), 3); - Assert.assertEquals((String)project.rows.get(0).cells.get(0).value, "NDB_No"); - Assert.assertEquals((String)project.rows.get(0).cells.get(1).value, "Shrt_Desc"); - Assert.assertEquals((String)project.rows.get(0).cells.get(2).value, "Water"); - - verify(properties, times(1)).getProperty("separator"); - verifyGetOption("ignore",properties); - verifyGetOption("header-lines",properties); - verifyGetOption("limit",properties); - verifyGetOption("skip",properties); - verifyGetOption("ignore-quotes",properties); - } - - @Test - public void readCsvWithPropertiesIgnoreQuotes(){ - String input = "data1,data2\",data3,data4"; - StringReader reader = new StringReader(input); - - when(properties.getProperty("separator")).thenReturn(","); - whenGetIntegerOption("ignore",properties,0); - whenGetIntegerOption("header-lines",properties,0); - whenGetIntegerOption("limit",properties,-1); - whenGetIntegerOption("skip",properties,0); - whenGetBooleanOption("ignore-quotes",properties,true); - - try { - SUT.read(reader, project, new ProjectMetadata(), properties); - } catch (Exception e) { - Assert.fail(); - } - - - Assert.assertEquals(project.rows.size(), 1); - Assert.assertEquals(project.rows.get(0).cells.size(), 4); - Assert.assertEquals((String)project.rows.get(0).cells.get(0).value, "data1"); - Assert.assertEquals((String)project.rows.get(0).cells.get(1).value, "data2"); - Assert.assertEquals((String)project.rows.get(0).cells.get(2).value, "data3"); - Assert.assertEquals((String)project.rows.get(0).cells.get(3).value, "data4"); - - verify(properties, times(1)).getProperty("separator"); - verifyGetOption("ignore",properties); - verifyGetOption("header-lines",properties); - verifyGetOption("limit",properties); - verifyGetOption("skip",properties); - verifyGetOption("ignore-quotes",properties); - } - - //--helpers-- - /** - * Used for parameterized testing for both SeparatorParser and TsvCsvParser. - */ - @DataProvider(name = "CSV-or-null") - public Object[][] CSV_or_null(){ - return new Object[][]{{ - ",", - null - }}; - } - - public void whenGetBooleanOption(String name, Properties properties, Boolean def){ - when(properties.containsKey(name)).thenReturn(true); - when(properties.getProperty(name)).thenReturn(Boolean.toString(def)); - } - - public void whenGetIntegerOption(String name, Properties properties, int def){ - when(properties.containsKey(name)).thenReturn(true); - when(properties.getProperty(name)).thenReturn(Integer.toString(def)); - } - - public void verifyGetOption(String name, Properties properties){ - verify(properties, times(1)).containsKey(name); - verify(properties, times(1)).getProperty(name); - } - -} diff --git a/main/tests/server/src/com/google/gridworks/tests/importers/XmlImportUtilitiesStub.java b/main/tests/server/src/com/google/gridworks/tests/importers/XmlImportUtilitiesStub.java deleted file mode 100644 index 802a7f0ea..000000000 --- a/main/tests/server/src/com/google/gridworks/tests/importers/XmlImportUtilitiesStub.java +++ /dev/null @@ -1,32 +0,0 @@ -package com.google.gridworks.tests.importers; - -import java.util.List; - -import javax.xml.stream.XMLStreamException; -import javax.xml.stream.XMLStreamReader; - -import com.google.gridworks.importers.XmlImportUtilities; -import com.google.gridworks.model.Project; - -public class XmlImportUtilitiesStub extends XmlImportUtilities { - - public List detectRecordElementWrapper(XMLStreamReader parser, String tag) throws XMLStreamException{ - return super.detectRecordElement(parser, tag); - } - - public void ProcessSubRecordWrapper(Project project, XMLStreamReader parser, ImportColumnGroup columnGroup, ImportRecord record) throws XMLStreamException{ - super.processSubRecord(project, parser, columnGroup, record); - } - - public void findRecordWrapper(Project project, XMLStreamReader parser, String[] recordPath, int pathIndex, ImportColumnGroup rootColumnGroup) throws XMLStreamException{ - super.findRecord(project, parser, recordPath, pathIndex, rootColumnGroup); - } - - public void processRecordWrapper(Project project, XMLStreamReader parser, ImportColumnGroup rootColumnGroup) throws XMLStreamException{ - super.processRecord(project, parser, rootColumnGroup); - } - - public void addCellWrapper(Project project, ImportColumnGroup columnGroup, ImportRecord record, String columnLocalName, String text, int commonStartingRowIndex) { - super.addCell(project, columnGroup, record, columnLocalName, text); - } -} diff --git a/main/tests/server/src/com/google/gridworks/tests/importers/XmlImportUtilitiesTests.java b/main/tests/server/src/com/google/gridworks/tests/importers/XmlImportUtilitiesTests.java deleted file mode 100644 index 615709778..000000000 --- a/main/tests/server/src/com/google/gridworks/tests/importers/XmlImportUtilitiesTests.java +++ /dev/null @@ -1,388 +0,0 @@ -package com.google.gridworks.tests.importers; - -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.io.UnsupportedEncodingException; -import java.util.ArrayList; -import java.util.List; - -import javax.xml.stream.FactoryConfigurationError; -import javax.xml.stream.XMLInputFactory; -import javax.xml.stream.XMLStreamException; -import javax.xml.stream.XMLStreamReader; - -import org.slf4j.LoggerFactory; -import org.testng.Assert; -import org.testng.annotations.AfterMethod; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.BeforeTest; -import org.testng.annotations.Test; - -import com.google.gridworks.importers.XmlImportUtilities.ImportColumn; -import com.google.gridworks.importers.XmlImportUtilities.ImportColumnGroup; -import com.google.gridworks.importers.XmlImportUtilities.ImportRecord; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; -import com.google.gridworks.tests.GridworksTest; - - -public class XmlImportUtilitiesTests extends GridworksTest { - - @BeforeTest - public void init() { - logger = LoggerFactory.getLogger(this.getClass()); - } - - //dependencies - Project project; - XMLStreamReader parser; - ImportColumnGroup columnGroup; - ImportRecord record; - ByteArrayInputStream inputStream; - - //System Under Test - XmlImportUtilitiesStub SUT; - - @BeforeMethod - public void SetUp(){ - SUT = new XmlImportUtilitiesStub(); - project = new Project(); - columnGroup = new ImportColumnGroup(); - record = new ImportRecord(); - } - - @AfterMethod - public void TearDown() throws IOException{ - SUT = null; - project = null; - parser = null; - columnGroup = null; - record = null; - if(inputStream != null) - inputStream.close(); - inputStream = null; - } - - @Test - public void detectPathFromTagTest(){ - loadXml("author1genre1"); - String tag = "library"; - - String[] response = XmlImportUtilitiesStub.detectPathFromTag(inputStream, tag); - Assert.assertNotNull(response); - Assert.assertEquals(response.length, 1); - Assert.assertEquals(response[0], "library"); - } - - @Test - public void detectPathFromTagWithNestedElement(){ - loadXml("author1genre1"); - String tag = "book"; - String[] response = XmlImportUtilitiesStub.detectPathFromTag(inputStream, tag); - Assert.assertNotNull(response); - Assert.assertEquals(response.length, 2); - Assert.assertEquals(response[0], "library"); - Assert.assertEquals(response[1], "book"); - } - - @Test - public void detectRecordElementTest(){ - loadXml("author1genre1"); - createParser(); - String tag="library"; - - List response = new ArrayList(); - try { - response = SUT.detectRecordElementWrapper(parser, tag); - } catch (XMLStreamException e) { - Assert.fail(); - } - Assert.assertNotNull(response); - Assert.assertEquals(response.size(), 1); - Assert.assertEquals(response.get(0), "library"); - } - - @Test - public void detectRecordElementCanHandleWithNestedElements(){ - loadXml("author1genre1"); - createParser(); - String tag="book"; - - List response = new ArrayList(); - try { - response = SUT.detectRecordElementWrapper(parser, tag); - } catch (XMLStreamException e) { - Assert.fail(); - } - Assert.assertNotNull(response); - Assert.assertEquals(response.size(), 2); - Assert.assertEquals(response.get(0), "library"); - Assert.assertEquals(response.get(1), "book"); - } - - @Test - public void detectRecordElementIsNullForUnfoundTag(){ - loadXml("author1genre1"); - createParser(); - String tag=""; - - List response = new ArrayList(); - try { - response = SUT.detectRecordElementWrapper(parser, tag); - } catch (XMLStreamException e) { - Assert.fail(); - } - Assert.assertNull(response); - } - - @Test - public void detectRecordElementRegressionTest(){ - loadSampleXml(); - - String[] path = XmlImportUtilitiesStub.detectRecordElement(inputStream); - Assert.assertNotNull(path); - Assert.assertEquals(path.length, 2); - Assert.assertEquals(path[0], "library"); - Assert.assertEquals(path[1], "book"); - } - - @Test - public void importXmlTest(){ - loadSampleXml(); - - String[] recordPath = new String[]{"library","book"}; - XmlImportUtilitiesStub.importXml(inputStream, project, recordPath, columnGroup ); - - log(project); - assertProjectCreated(project, 0, 6); - - Assert.assertEquals(project.rows.get(0).cells.size(), 4); - - Assert.assertEquals(columnGroup.subgroups.size(), 1); - Assert.assertNotNull(columnGroup.subgroups.get("book")); - Assert.assertEquals(columnGroup.subgroups.get("book").subgroups.size(), 3); - Assert.assertNotNull(columnGroup.subgroups.get("book").subgroups.get("author")); - Assert.assertNotNull(columnGroup.subgroups.get("book").subgroups.get("title")); - Assert.assertNotNull(columnGroup.subgroups.get("book").subgroups.get("publish_date")); - } - - @Test - public void importXmlWithVaryingStructureTest(){ - loadXml(XmlImporterTests.getSampleWithVaryingStructure()); - - String[] recordPath = new String[]{"library", "book"}; - XmlImportUtilitiesStub.importXml(inputStream, project, recordPath, columnGroup); - - log(project); - assertProjectCreated(project, 0, 6); - Assert.assertEquals(project.rows.get(0).cells.size(), 4); - Assert.assertEquals(project.rows.get(5).cells.size(), 5); - - Assert.assertEquals(columnGroup.subgroups.size(), 1); - Assert.assertEquals(columnGroup.name, ""); - ImportColumnGroup book = columnGroup.subgroups.get("book"); - Assert.assertNotNull(book); - Assert.assertEquals(book.columns.size(), 1); - Assert.assertEquals(book.subgroups.size(), 4); - Assert.assertNotNull(book.subgroups.get("author")); - Assert.assertEquals(book.subgroups.get("author").columns.size(), 1); - Assert.assertNotNull(book.subgroups.get("title")); - Assert.assertNotNull(book.subgroups.get("publish_date")); - Assert.assertNotNull(book.subgroups.get("genre")); - } - - @Test - public void createColumnsFromImportTest(){ - - ImportColumnGroup columnGroup = new ImportColumnGroup(); - ImportColumnGroup subGroup = new ImportColumnGroup(); - columnGroup.columns.put("a", new ImportColumn("hello")); - columnGroup.columns.put("b", new ImportColumn("world")); - subGroup.columns.put("c", new ImportColumn("foo")); - subGroup.columns.put("d", new ImportColumn("bar")); - columnGroup.subgroups.put("e", subGroup); - - XmlImportUtilitiesStub.createColumnsFromImport(project, columnGroup); - log(project); - assertProjectCreated(project, 4, 0); - Assert.assertEquals(project.columnModel.columns.get(0).getName(), "world"); - Assert.assertEquals(project.columnModel.columns.get(1).getName(), "hello"); - Assert.assertEquals(project.columnModel.columns.get(2).getName(), "bar"); - Assert.assertEquals(project.columnModel.columns.get(3).getName(), "foo"); - Assert.assertEquals(project.columnModel.columnGroups.get(0).keyColumnIndex, 2); - Assert.assertEquals(project.columnModel.columnGroups.get(0).startColumnIndex, 2); - Assert.assertEquals(project.columnModel.columnGroups.get(0).columnSpan, 2); - } - - @Test - public void findRecordTest(){ - loadSampleXml(); - createParser(); - ParserSkip(); - - String[] recordPath = new String[]{"library","book"}; - int pathIndex = 0; - - try { - SUT.findRecordWrapper(project, parser, recordPath, pathIndex, columnGroup); - } catch (XMLStreamException e) { - Assert.fail(); - } - - log(project); - assertProjectCreated(project, 0, 6); - - Assert.assertEquals(project.rows.get(0).cells.size(), 4); - //TODO - } - - @Test - public void processRecordTest(){ - loadXml("author1genre1"); - createParser(); - ParserSkip(); - - try { - SUT.processRecordWrapper(project, parser, columnGroup); - } catch (XMLStreamException e) { - Assert.fail(); - } - log(project); - Assert.assertNotNull(project.rows); - Assert.assertEquals(project.rows.size(), 1); - Row row = project.rows.get(0); - Assert.assertNotNull(row); - Assert.assertNotNull(row.getCell(1)); - Assert.assertEquals(row.getCell(1).value, "author1"); - - } - - @Test - public void processRecordTestDuplicateColumns(){ - loadXml("author1author2genre1"); - createParser(); - ParserSkip(); - - try { - SUT.processRecordWrapper(project, parser, columnGroup); - } catch (XMLStreamException e) { - Assert.fail(); - } - log(project); - Assert.assertNotNull(project.rows); - Assert.assertEquals(project.rows.size(), 2); - Row row = project.rows.get(0); - Assert.assertNotNull(row); - Assert.assertEquals(row.cells.size(), 3); - Assert.assertNotNull(row.getCell(1)); - Assert.assertEquals(row.getCell(1).value, "author1"); - row = project.rows.get(1); - Assert.assertEquals(row.getCell(1).value, "author2"); - } - - @Test - public void processRecordTestNestedElement(){ - loadXml("author1a dategenre1"); - createParser(); - ParserSkip(); - - try { - SUT.processRecordWrapper(project, parser, columnGroup); - } catch (XMLStreamException e) { - Assert.fail(); - } - log(project); - Assert.assertNotNull(project.rows); - Assert.assertEquals(project.rows.size(), 1); - Row row = project.rows.get(0); - Assert.assertNotNull(row); - Assert.assertEquals(row.cells.size(), 4); - Assert.assertNotNull(row.getCell(1)); - Assert.assertEquals(row.getCell(1).value, "author1"); - Assert.assertNotNull(row.getCell(2)); - Assert.assertEquals(row.getCell(2).value, "a date"); - } - - - @Test - public void processSubRecordTest(){ - loadXml("author1genre1"); - createParser(); - ParserSkip(); - - try { - SUT.ProcessSubRecordWrapper(project, parser, columnGroup, record); - } catch (XMLStreamException e) { - Assert.fail(); - } - log(project); - - Assert.assertEquals(columnGroup.subgroups.size(), 1); - Assert.assertEquals(columnGroup.name, ""); - - Assert.assertNotNull(columnGroup.subgroups.get("library")); - Assert.assertEquals(columnGroup.subgroups.get("library").subgroups.size(), 1); - - ImportColumnGroup book = columnGroup.subgroups.get("library").subgroups.get("book"); - Assert.assertNotNull(book); - Assert.assertEquals(book.subgroups.size(), 2); - Assert.assertNotNull(book.subgroups.get("author")); - Assert.assertNotNull(book.subgroups.get("genre")); - - //TODO check record - } - - @Test - public void addCellTest(){ - String columnLocalName = "author"; - String text = "Author1, The"; - int commonStartingRowIndex = 0; - SUT.addCellWrapper(project, columnGroup, record, columnLocalName, text, commonStartingRowIndex); - - Assert.assertNotNull(record); - Assert.assertNotNull(record.rows); - //Assert.assertNotNull(record.columnEmptyRowIndices); - Assert.assertEquals(record.rows.size(), 1); - //Assert.assertEquals(record.columnEmptyRowIndices.size(), 2); - Assert.assertNotNull(record.rows.get(0)); - //Assert.assertNotNull(record.columnEmptyRowIndices.get(0)); - //Assert.assertNotNull(record.columnEmptyRowIndices.get(1)); - Assert.assertEquals(record.rows.get(0).size(), 2); - Assert.assertNotNull(record.rows.get(0).get(0)); - Assert.assertEquals(record.rows.get(0).get(0).value, "Author1, The"); - //Assert.assertEquals(record.columnEmptyRowIndices.get(0).intValue(),0); - //Assert.assertEquals(record.columnEmptyRowIndices.get(1).intValue(),1); - - } - - //----------------helpers------------- - public void loadSampleXml(){ - loadXml( XmlImporterTests.getSample() ); - } - - public void loadXml(String xml){ - try { - inputStream = new ByteArrayInputStream( xml.getBytes( "UTF-8" ) ); - } catch (UnsupportedEncodingException e1) { - Assert.fail(); - } - } - - public void ParserSkip(){ - try { - parser.next(); //move parser forward once e.g. skip the START_DOCUMENT parser event - } catch (XMLStreamException e1) { - Assert.fail(); - } - } - - public void createParser(){ - try { - parser = XMLInputFactory.newInstance().createXMLStreamReader(inputStream); - } catch (XMLStreamException e1) { - Assert.fail(); - } catch (FactoryConfigurationError e1) { - Assert.fail(); - } - } -} diff --git a/main/tests/server/src/com/google/gridworks/tests/importers/XmlImporterTests.java b/main/tests/server/src/com/google/gridworks/tests/importers/XmlImporterTests.java deleted file mode 100644 index d3e6a4f0d..000000000 --- a/main/tests/server/src/com/google/gridworks/tests/importers/XmlImporterTests.java +++ /dev/null @@ -1,234 +0,0 @@ -package com.google.gridworks.tests.importers; - -import static org.mockito.Mockito.mock; - -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.io.UnsupportedEncodingException; -import java.util.Properties; - -import org.slf4j.LoggerFactory; -import org.testng.Assert; -import org.testng.annotations.AfterMethod; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.BeforeTest; -import org.testng.annotations.Test; - -import com.google.gridworks.ProjectMetadata; -import com.google.gridworks.importers.XmlImporter; -import com.google.gridworks.model.Project; -import com.google.gridworks.model.Row; -import com.google.gridworks.tests.GridworksTest; - - -public class XmlImporterTests extends GridworksTest { - - @BeforeTest - public void init() { - logger = LoggerFactory.getLogger(this.getClass()); - } - - //dependencies - Project project = null; - Properties options = null; - ByteArrayInputStream inputStream = null; - - //System Under Test - XmlImporter SUT = null; - - - @BeforeMethod - public void SetUp(){ - SUT = new XmlImporter(); - project = new Project(); - options = mock(Properties.class); - } - - @AfterMethod - public void TearDown() throws IOException{ - SUT = null; - project = null; - options = null; - if (inputStream != null) inputStream.close(); - inputStream = null; - } - - @Test - public void canParseSample(){ - RunTest(getSample()); - - log(project); - assertProjectCreated(project, 4, 6); - - Row row = project.rows.get(0); - Assert.assertNotNull(row); - Assert.assertNotNull(row.getCell(1)); - Assert.assertEquals(row.getCell(1).value, "Author 1, The"); - } - - @Test - public void canParseSampleWithDuplicateNestedElements(){ - RunTest(getSampleWithDuplicateNestedElements()); - - log(project); - assertProjectCreated(project, 4, 12); - - Row row = project.rows.get(0); - Assert.assertNotNull(row); - Assert.assertEquals(row.cells.size(), 4); - Assert.assertNotNull(row.getCell(2)); - Assert.assertEquals(row.getCell(1).value, "Author 1, The"); - Assert.assertEquals(project.rows.get(1).getCell(1).value, "Author 1, Another"); - } - - @Test - public void testCanParseLineBreak(){ - - RunTest(getSampleWithLineBreak()); - - log(project); - assertProjectCreated(project, 4, 6); - - Row row = project.rows.get(3); - Assert.assertNotNull(row); - Assert.assertEquals(row.cells.size(), 4); - Assert.assertNotNull(row.getCell(1)); - Assert.assertEquals(row.getCell(1).value, "With line\n break"); - } - - @Test - public void testElementsWithVaryingStructure(){ - RunTest(getSampleWithVaryingStructure()); - - log(project); - assertProjectCreated(project, 5, 6); - - Assert.assertEquals( project.columnModel.getColumnByCellIndex(5).getName(), "book - genre"); - - Row row0 = project.rows.get(0); - Assert.assertNotNull(row0); - Assert.assertEquals(row0.cells.size(),4); - - Row row5 = project.rows.get(5); - Assert.assertNotNull(row5); - Assert.assertEquals(row5.cells.size(),5); - } - - @Test - public void testElementWithNestedTree(){ - RunTest(getSampleWithTreeStructure()); - log(project); - assertProjectCreated(project, 5, 6); - - Assert.assertEquals(project.columnModel.columnGroups.size(),1); - Assert.assertEquals(project.columnModel.columnGroups.get(0).keyColumnIndex, 2); - Assert.assertEquals(project.columnModel.columnGroups.get(0).startColumnIndex, 2); - Assert.assertNull(project.columnModel.columnGroups.get(0).parentGroup); - Assert.assertEquals(project.columnModel.columnGroups.get(0).subgroups.size(),0); - Assert.assertEquals(project.columnModel.columnGroups.get(0).columnSpan,2); - } - - //------------helper methods--------------- - - public static String getTypicalElement(int id){ - return "" + - "Author " + id + ", The" + - "Book title " + id + "" + - "2010-05-26" + - ""; - } - - public static String getElementWithDuplicateSubElement(int id){ - return "" + - "Author " + id + ", The" + - "Author " + id + ", Another" + - "Book title " + id + "" + - "2010-05-26" + - ""; - } - - public static String getSample(){ - StringBuilder sb = new StringBuilder(); - sb.append(""); - for(int i = 1; i < 7; i++){ - sb.append(getTypicalElement(i)); - } - sb.append(""); - return sb.toString(); - } - - public static String getSampleWithDuplicateNestedElements(){ - StringBuilder sb = new StringBuilder(); - sb.append(""); - for(int i = 1; i < 7; i++){ - sb.append(getElementWithDuplicateSubElement(i)); - } - sb.append(""); - return sb.toString(); - - } - - public static String getSampleWithLineBreak(){ - StringBuilder sb = new StringBuilder(); - sb.append(""); - for(int i = 1; i < 4; i++){ - sb.append(getTypicalElement(i)); - } - sb.append("" + - "With line\n break" + - "Book title 4" + - "2010-05-26" + - ""); - sb.append(getTypicalElement(5)); - sb.append(getTypicalElement(6)); - sb.append(""); - return sb.toString(); - } - - public static String getSampleWithVaryingStructure(){ - StringBuilder sb = new StringBuilder(); - sb.append(""); - for(int i = 1; i < 6; i++){ - sb.append(getTypicalElement(i)); - } - sb.append("" + - "Author 6, The" + - "Book title 6" + - "New element not seen in other records" + - "2010-05-26" + - ""); - sb.append(""); - return sb.toString(); - } - - public static String getSampleWithTreeStructure(){ - StringBuilder sb = new StringBuilder(); - sb.append(""); - for(int i = 1; i < 7; i++){ - sb.append("" + - "Author " + i + ", The" + - "1950-0" + i + "-15" + - "Book title " + i + "" + - "2010-05-26" + - ""); - } - sb.append(""); - return sb.toString(); - } - - private void RunTest(String testString){ - try { - inputStream = new ByteArrayInputStream( testString.getBytes( "UTF-8" ) ); - } catch (UnsupportedEncodingException e1) { - Assert.fail(); - } - - try { - SUT.read(inputStream, project, new ProjectMetadata(), options); - } catch (Exception e) { - Assert.fail(); - } - } - - -} diff --git a/main/tests/server/src/com/google/gridworks/tests/model/ProjectStub.java b/main/tests/server/src/com/google/gridworks/tests/model/ProjectStub.java deleted file mode 100644 index 9abaf8154..000000000 --- a/main/tests/server/src/com/google/gridworks/tests/model/ProjectStub.java +++ /dev/null @@ -1,10 +0,0 @@ -package com.google.gridworks.tests.model; - -import com.google.gridworks.model.Project; - - -public class ProjectStub extends Project { - public ProjectStub(long id){ - super(id); - } -} diff --git a/main/tests/server/src/com/google/gridworks/tests/util/ParsingUtilitiesTests.java b/main/tests/server/src/com/google/gridworks/tests/util/ParsingUtilitiesTests.java deleted file mode 100644 index 5e7eb2bd0..000000000 --- a/main/tests/server/src/com/google/gridworks/tests/util/ParsingUtilitiesTests.java +++ /dev/null @@ -1,54 +0,0 @@ -package com.google.gridworks.tests.util; - -import org.json.JSONException; -import org.json.JSONObject; -import org.slf4j.LoggerFactory; -import org.testng.Assert; -import org.testng.annotations.BeforeTest; -import org.testng.annotations.Test; - -import com.google.gridworks.tests.GridworksTest; -import com.google.gridworks.util.ParsingUtilities; - -public class ParsingUtilitiesTests extends GridworksTest { - - @BeforeTest - public void init() { - logger = LoggerFactory.getLogger(this.getClass()); - } - - //--------------evaluateJsonStringToObject tests----------------------- - - @Test - public void evaluateJsonStringToObjectRegressionTest(){ - try { - JSONObject o = ParsingUtilities.evaluateJsonStringToObject("{\"foo\":\"bar\"}"); - Assert.assertNotNull(o); - Assert.assertEquals("bar", o.getString("foo")); - } catch (JSONException e) { - Assert.fail(); - } - } - - @Test - public void evaluateJsonStringToObjectWithNullParameters(){ - try { - Assert.assertNull(ParsingUtilities.evaluateJsonStringToObject(null)); - Assert.fail(); - } catch (IllegalArgumentException e){ - //expected - } catch (JSONException e) { - Assert.fail(); - } - } - - @Test - public void evaluateJsonStringToObjectWithMalformedParameters(){ - try { - ParsingUtilities.evaluateJsonStringToObject("malformed"); - Assert.fail(); - } catch (JSONException e) { - //expected - } - } -} diff --git a/main/tests/server/src/com/google/refine/tests/GridworksServletStub.java b/main/tests/server/src/com/google/refine/tests/GridworksServletStub.java new file mode 100644 index 000000000..39f2b006f --- /dev/null +++ b/main/tests/server/src/com/google/refine/tests/GridworksServletStub.java @@ -0,0 +1,46 @@ +package com.google.refine.tests; + +import java.io.IOException; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import com.google.refine.GridworksServlet; +import com.google.refine.commands.Command; + +/** + * Exposes protected methods of com.google.refine.GridworksServlet as public for unit testing + * + */ +public class GridworksServletStub extends GridworksServlet { + + //requirement of extending HttpServlet, not required for testing + private static final long serialVersionUID = 1L; + + public void wrapService(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException{ + super.service(request, response); + } + + public String wrapGetCommandName(HttpServletRequest request){ + return super.getCommandKey(request); + } + + //-------------------helper methods-------------- + /** + * Helper method for inserting a mock object + * @param commandName + * @param command + */ + public void insertCommand(String commandName, Command command ){ + registerOneCommand("core/" + commandName, command); + } + + /** + * Helper method for clearing up after testing + * @param commandName + */ + public void removeCommand( String commandName ){ + unregisterCommand(commandName); + } +} diff --git a/main/tests/server/src/com/google/refine/tests/GridworksServletTests.java b/main/tests/server/src/com/google/refine/tests/GridworksServletTests.java new file mode 100644 index 000000000..41b406341 --- /dev/null +++ b/main/tests/server/src/com/google/refine/tests/GridworksServletTests.java @@ -0,0 +1,186 @@ +package com.google.refine.tests; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.io.IOException; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.slf4j.LoggerFactory; +import org.testng.Assert; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.Test; + +import com.google.refine.commands.Command; + +public class GridworksServletTests extends GridworksTest { + + @BeforeTest + public void init() { + logger = LoggerFactory.getLogger(this.getClass()); + } + + //System under test + GridworksServletStub SUT = null; + + //variables + final static private String TEST_COMMAND_NAME = "test-command"; + final static private String TEST_COMMAND_PATH = "/command/core/test-command/foobar"; + final static private String BAD_COMMAND_PATH = "/command-does-not-exist"; + + final static private String POST = "POST"; + final static private String GET = "GET"; + + // mocks + HttpServletRequest request = null; + HttpServletResponse response = null; + Command command = null; + + + @BeforeMethod + public void SetUp() throws ServletException { + request = mock(HttpServletRequest.class); + response = mock(HttpServletResponse.class); + command = mock(Command.class); + + SUT = new GridworksServletStub(); + SUT.insertCommand(TEST_COMMAND_NAME,command); //inject mock into command container + } + + @AfterMethod + public void TearDown() { + SUT.removeCommand(TEST_COMMAND_NAME); //remove mock to clean command container + SUT = null; + + request = null; + response = null; + command = null; + } + + //-------------------AutoSaveTimerTask tests----------- + //TODO would need to mock Timer and inject it into GridworksServlet. Also need to deal with ProjectManager.singleton + //-------------------init tests------------------------ + //TODO need to stub super.init(), mock Timer and inject it into GridworksServlet + //-------------------destroy tests--------------------- + //TODO need to mock Timer and inject it into GridworksServlet. Also need to deal with ProjectManager.singleton + + //--------------------doGet tests---------------------- + @Test + public void doGetRegressionTest(){ + whenGetCommandNameThenReturn(TEST_COMMAND_PATH); + whenGetMethodThenReturn(GET); + + try { + SUT.wrapService(request, response); + } catch (ServletException e) { + Assert.fail(); + } catch (IOException e) { + Assert.fail(); + } + + verifyGetCommandNameCalled(); + try { + verify(command,times(1)).doGet(request, response); + } catch (ServletException e) { + Assert.fail(); + } catch (IOException e) { + Assert.fail(); + } + } + + @Test + public void doGetReturnsError404WhenCommandNotFound(){ + whenGetCommandNameThenReturn(BAD_COMMAND_PATH); + whenGetMethodThenReturn(GET); + + try { + SUT.wrapService(request, response); + } catch (ServletException e) { + Assert.fail(); + } catch (IOException e) { + Assert.fail(); + } + + verifyGetCommandNameCalled(); + verifyError404Called(); + + } + + //----------------doPost tests------------------------- + @Test + public void doPostRegressionTest(){ + whenGetCommandNameThenReturn(TEST_COMMAND_PATH); + whenGetMethodThenReturn(POST); + + try { + SUT.wrapService(request, response); + } catch (ServletException e) { + Assert.fail(); + } catch (IOException e) { + Assert.fail(); + } + + verifyGetCommandNameCalled(); + try { + verify(command,times(1)).doPost(request, response); + } catch (ServletException e) { + Assert.fail(); + } catch (IOException e) { + Assert.fail(); + } + } + + @Test + public void doPostReturns404WhenCommandNotFound(){ + whenGetCommandNameThenReturn(BAD_COMMAND_PATH); + whenGetMethodThenReturn(POST); + + try { + SUT.wrapService(request, response); + } catch (ServletException e) { + Assert.fail(); + } catch (IOException e) { + Assert.fail(); + } + + verifyGetCommandNameCalled(); + verifyError404Called(); + } + + //----------------getCommandName tests---------------- + + @Test + public void getCommandNameHandlesBadCommandName(){ + + when(request.getPathInfo()).thenReturn("/command/this-command-has-no-trailing-slash"); + + Assert.assertEquals("this-command-has-no-trailing-slash", SUT.wrapGetCommandName(request)); + + verify(request, times(1)).getPathInfo(); + } + + //------------helpers + protected void whenGetCommandNameThenReturn(String commandName){ + when(request.getPathInfo()).thenReturn(commandName); + } + protected void whenGetMethodThenReturn(String method){ + when(request.getMethod()).thenReturn(method); + } + protected void verifyGetCommandNameCalled(){ + verify(request,times(2)).getPathInfo(); + } + protected void verifyError404Called(){ + try { + verify(response,times(1)).sendError(404); + } catch (IOException e) { + Assert.fail(); + } + } +} diff --git a/main/tests/server/src/com/google/refine/tests/GridworksTest.java b/main/tests/server/src/com/google/refine/tests/GridworksTest.java new file mode 100644 index 000000000..3f80e40ae --- /dev/null +++ b/main/tests/server/src/com/google/refine/tests/GridworksTest.java @@ -0,0 +1,52 @@ +package com.google.refine.tests; + +import org.slf4j.Logger; +import org.testng.Assert; +import org.testng.annotations.BeforeSuite; + +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.Project; +import com.google.refine.model.Row; + +public class GridworksTest { + + protected Logger logger; + + @BeforeSuite + public void init() { + System.setProperty("log4j.configuration", "tests.log4j.properties"); + } + + public static void assertProjectCreated(Project project, int numCols, int numRows) { + Assert.assertNotNull(project); + Assert.assertNotNull(project.columnModel); + Assert.assertNotNull(project.columnModel.columns); + Assert.assertEquals(project.columnModel.columns.size(), numCols); + Assert.assertNotNull(project.rows); + Assert.assertEquals(project.rows.size(), numRows); + } + + public void log(Project project) { + // some quick and dirty debugging + StringBuilder sb = new StringBuilder(); + for(Column c : project.columnModel.columns){ + sb.append(c.getName()); + sb.append("; "); + } + logger.info(sb.toString()); + for(Row r : project.rows){ + sb = new StringBuilder(); + for(int i = 0; i < r.cells.size(); i++){ + Cell c = r.getCell(i); + if(c != null){ + sb.append(c.value); + sb.append("; "); + }else{ + sb.append("null; "); + } + } + logger.info(sb.toString()); + } + } +} diff --git a/main/tests/server/src/com/google/refine/tests/ProjectManagerStub.java b/main/tests/server/src/com/google/refine/tests/ProjectManagerStub.java new file mode 100644 index 000000000..3c1104fef --- /dev/null +++ b/main/tests/server/src/com/google/refine/tests/ProjectManagerStub.java @@ -0,0 +1,79 @@ +package com.google.refine.tests; + +import java.io.IOException; +import java.io.InputStream; + +import org.apache.tools.tar.TarOutputStream; + +import com.google.refine.ProjectManager; +import com.google.refine.ProjectMetadata; +import com.google.refine.history.HistoryEntryManager; +import com.google.refine.model.Project; + +/** + * Stub used to make protected methods public for testing + * + */ +public class ProjectManagerStub extends ProjectManager { + + public ProjectManagerStub(){ + super(); + } + + @Override + public void deleteProject(long projectID) { + // empty + + } + + @Override + public void exportProject(long projectId, TarOutputStream tos) throws IOException { + // empty + } + + @Override + public HistoryEntryManager getHistoryEntryManager() { + // empty + return null; + } + + @Override + public void importProject(long projectID, InputStream inputStream, boolean gziped) throws IOException { + // empty + } + + @Override + protected Project loadProject(long id) { + // empty + return null; + } + + @Override + public boolean loadProjectMetadata(long projectID) { + // empty + return false; + } + + @Override + public void saveMetadata(ProjectMetadata metadata, long projectId) throws Exception { + // empty + + } + + @Override + public void saveProject(Project project) { + // empty + } + + //Overridden to make public for testing + @Override + public void saveProjects(boolean allModified){ + super.saveProjects(allModified); + } + + @Override + protected void saveWorkspace() { + // empty + } + +} diff --git a/main/tests/server/src/com/google/refine/tests/ProjectManagerTests.java b/main/tests/server/src/com/google/refine/tests/ProjectManagerTests.java new file mode 100644 index 000000000..a5ba591c2 --- /dev/null +++ b/main/tests/server/src/com/google/refine/tests/ProjectManagerTests.java @@ -0,0 +1,212 @@ +package com.google.refine.tests; + +import java.util.Date; +import java.util.GregorianCalendar; + +import org.mockito.Mockito; +import org.slf4j.LoggerFactory; +import org.testng.Assert; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.Test; + +import com.google.refine.ProjectMetadata; +import com.google.refine.model.Project; +import com.google.refine.tests.model.ProjectStub; + +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verifyNoMoreInteractions; +import static org.mockito.Mockito.never; + +public class ProjectManagerTests extends GridworksTest { + ProjectManagerStub pm; + ProjectManagerStub SUT; + Project project; + ProjectMetadata metadata; + + @BeforeTest + public void init() { + logger = LoggerFactory.getLogger(this.getClass()); + } + + @BeforeMethod + public void SetUp(){ + pm = new ProjectManagerStub(); + SUT = spy(pm); + project = mock(Project.class); + metadata = mock(ProjectMetadata.class); + } + + @AfterMethod + public void TearDown(){ + metadata = null; + project = null; + SUT = null; + pm = null; + } + + @Test + public void canRegisterProject(){ + + SUT.registerProject(project, metadata); + + AssertProjectRegistered(); + + verifyNoMoreInteractions(project); + verifyNoMoreInteractions(metadata); + } + + //TODO test registerProject in race condition + + @Test + public void canEnsureProjectSave(){ + whenGetSaveTimes(project, metadata); + registerProject(); + + //run test + SUT.ensureProjectSaved(project.id); + + //assert and verify + AssertProjectRegistered(); + try { + verify(SUT, times(1)).saveMetadata(metadata, project.id); + } catch (Exception e) { + Assert.fail(); + } + this.verifySaveTimeCompared(1); + verify(SUT, times(1)).saveProject(project); + + //ensure end + verifyNoMoreInteractions(project); + verifyNoMoreInteractions(metadata); + } + + //TODO test ensureProjectSave in race condition + + @Test + public void canSaveAllModified(){ + whenGetSaveTimes(project, metadata); //5 minute difference + registerProject(project, metadata); + + //add a second project to the cache + Project project2 = spy(new ProjectStub(2)); + ProjectMetadata metadata2 = mock(ProjectMetadata.class); + whenGetSaveTimes(project2, metadata2, 10); //not modified since the last save but within 30 seconds flush limit + registerProject(project2, metadata2); + + //check that the two projects are not the same + Assert.assertFalse(project.id == project2.id); + + SUT.save(true); + + verifySaved(project, metadata); + + verifySaved(project2, metadata2); + + verify(SUT, times(1)).saveWorkspace(); + } + + @Test + public void canFlushFromCache(){ + + whenGetSaveTimes(project, metadata, -10 );//already saved (10 seconds before) + registerProject(project, metadata); + Assert.assertSame(SUT.getProject(0), project); + + SUT.save(true); + + verify(metadata, times(1)).getModified(); + verify(project, times(2)).getLastSave(); + verify(SUT, never()).saveProject(project); + Assert.assertEquals(SUT.getProject(0), null); + verifyNoMoreInteractions(project); + verifyNoMoreInteractions(metadata); + + verify(SUT, times(1)).saveWorkspace(); + } + + @Test + public void cannotSaveWhenBusy(){ + registerProject(); + SUT.setBusy(true); + + SUT.save(false); + + verify(SUT, never()).saveProjects(Mockito.anyBoolean()); + verify(SUT, never()).saveWorkspace(); + verifyNoMoreInteractions(project); + verifyNoMoreInteractions(metadata); + } + + //TODO test canSaveAllModifiedWithRaceCondition + + @Test + public void canSaveSomeModified(){ + registerProject(); + whenGetSaveTimes(project, metadata ); + + SUT.save(false); //not busy + + verifySaved(project, metadata); + verify(SUT, times(1)).saveWorkspace(); + + } + //TODO test canSaveAllModifiedWithRaceCondition + + //-------------helpers------------- + + protected void registerProject(){ + this.registerProject(project, metadata); + } + protected void registerProject(Project proj, ProjectMetadata meta){ + SUT.registerProject(proj, meta); + } + + protected void AssertProjectRegistered(){ + Assert.assertEquals(SUT.getProject(project.id), project); + Assert.assertEquals(SUT.getProjectMetadata(project.id), metadata); + } + + protected void whenGetSaveTimes(Project proj, ProjectMetadata meta){ + whenGetSaveTimes(proj, meta, 5); + } + protected void whenGetSaveTimes(Project proj, ProjectMetadata meta, int secondsDifference){ + whenProjectGetLastSave(proj); + whenMetadataGetModified(meta, secondsDifference); + } + + protected void whenProjectGetLastSave(Project proj){ + Date projectLastSaveDate = new GregorianCalendar(1970,01,02,00,30,00).getTime(); + when(proj.getLastSave()).thenReturn(projectLastSaveDate); + } + + protected void whenMetadataGetModified(ProjectMetadata meta){ + whenMetadataGetModified(meta, 5*60); + } + protected void whenMetadataGetModified(ProjectMetadata meta, int secondsDifference){ + Date metadataModifiedDate = new GregorianCalendar(1970,01,02,00, 30, secondsDifference).getTime(); + when(meta.getModified()).thenReturn(metadataModifiedDate); + } + + protected void verifySaveTimeCompared(int times){ + verifySaveTimeCompared(project, metadata, times); + } + protected void verifySaveTimeCompared(Project project, ProjectMetadata metadata, int times){ + verify(metadata, times(times)).getModified(); + verify(project, times(times)).getLastSave(); + } + + protected void verifySaved(Project proj, ProjectMetadata meta){ + verify(meta, times(1)).getModified(); + verify(proj, times(2)).getLastSave(); + verify(SUT, times(1)).saveProject(proj); + + verifyNoMoreInteractions(proj); + verifyNoMoreInteractions(meta); + } +} diff --git a/main/tests/server/src/com/google/refine/tests/commands/CommandStub.java b/main/tests/server/src/com/google/refine/tests/commands/CommandStub.java new file mode 100644 index 000000000..94365821c --- /dev/null +++ b/main/tests/server/src/com/google/refine/tests/commands/CommandStub.java @@ -0,0 +1,40 @@ +package com.google.refine.tests.commands; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; + +import org.json.JSONException; +import org.json.JSONObject; + +import com.google.refine.browsing.Engine; +import com.google.refine.commands.Command; +import com.google.refine.model.Project; + +/** + * Implementation of abstract class for testing Exposes protected members as public + */ +public class CommandStub extends Command { + + public Project wrapGetProject(HttpServletRequest request) + throws ServletException { + return getProject(request); + } + + public JSONObject wrapGetEngineConfig(HttpServletRequest request) + throws JSONException { + return getEngineConfig(request); + } + + public Engine wrapGetEngine(HttpServletRequest request, Project project) + throws Exception { + return getEngine(request, project); + } + + public int wrapGetIntegerParameter(HttpServletRequest request, String name,int def) { + return getIntegerParameter(request, name, def); + } + + public JSONObject wrapGetJsonParameter(HttpServletRequest request,String name) { + return getJsonParameter(request, name); + } +} diff --git a/main/tests/server/src/com/google/refine/tests/commands/CommandTests.java b/main/tests/server/src/com/google/refine/tests/commands/CommandTests.java new file mode 100644 index 000000000..f7eebb891 --- /dev/null +++ b/main/tests/server/src/com/google/refine/tests/commands/CommandTests.java @@ -0,0 +1,318 @@ +package com.google.refine.tests.commands; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; + +import org.json.JSONException; +import org.json.JSONObject; +import org.slf4j.LoggerFactory; +import org.testng.Assert; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.Test; + +import com.google.refine.ProjectManager; +import com.google.refine.browsing.Engine; +import com.google.refine.model.Project; +import com.google.refine.tests.GridworksTest; + +public class CommandTests extends GridworksTest { + + @BeforeTest + public void init() { + logger = LoggerFactory.getLogger(this.getClass()); + } + + CommandStub SUT = null; + HttpServletRequest request = null; + ProjectManager projectManager = null; + Project project = null; + + @BeforeMethod + public void SetUp() { + SUT = new CommandStub(); + request = mock(HttpServletRequest.class); + projectManager = mock(ProjectManager.class); + project = mock(Project.class); + } + + @AfterMethod + public void TearDown() { + SUT = null; + request = null; + projectManager = null; + project = null; + } + + // -----------------getProject tests------------ + + @Test + public void getProjectThrowsWithNullParameter() { + try { + SUT.wrapGetProject(null); + Assert.fail(); // should throw exception before this + } catch (IllegalArgumentException e) { + // expected + } catch (Exception e) { + Assert.fail(); + } + } + + @Test + public void getProjectThrowsIfResponseHasNoOrBrokenProjectParameter() { + when(request.getParameter("project")).thenReturn(""); // null + try { + SUT.wrapGetProject(request); + } catch (ServletException e) { + // expected + } catch (Exception e) { + Assert.fail(); + } + verify(request, times(1)).getParameter("project"); + } + + // -----------------getEngineConfig tests----------------- + @Test + public void getEngineConfigThrowsWithNullParameter() { + try { + SUT.wrapGetEngineConfig(null); + Assert.fail(); + } catch (IllegalArgumentException e) { + // expected + } catch (Exception e) { + Assert.fail(); + } + } + + @Test + public void getEngineConfigReturnsNullWithNullEngineParameter() { + when(request.getParameter("engine")).thenReturn(null); + try { + Assert.assertNull(SUT.wrapGetEngineConfig(request)); + } catch (JSONException e) { + Assert.fail(); + } catch (Exception e) { + Assert.fail(); + } + } + + @Test + public void getEngineConfigReturnsNullWithEmptyOrBadParameterValue() { + when(request.getParameter("engine")).thenReturn("sdfasdfas"); + + try { + Assert.assertNull( SUT.wrapGetEngineConfig(request) ); + } catch (JSONException e) { + Assert.fail(); + } + + verify(request, times(1)).getParameter("engine"); + } + + @Test + public void getEngineConfigRegressionTest() { + when(request.getParameter("engine")).thenReturn("{\"hello\":\"world\"}"); + JSONObject o = null; + try { + o = SUT.wrapGetEngineConfig(request); + Assert.assertEquals("world", o.getString("hello")); + } catch (JSONException e) { + Assert.fail(); + } catch (Exception e) { + Assert.fail(); + } + verify(request, times(1)).getParameter("engine"); + } + + // -----------------getEngine tests---------------------- + @Test + public void getEngineThrowsOnNullParameter() { + try { + SUT.wrapGetEngine(null, null); + } catch (IllegalArgumentException e) { + // expected + } catch (Exception e) { + Assert.fail(); + } + + try { + SUT.wrapGetEngine(null, project); + } catch (IllegalArgumentException e) { + // expected + } catch (Exception e) { + Assert.fail(); + } + + try { + SUT.wrapGetEngine(request, null); + } catch (IllegalArgumentException e) { + // expected + } catch (Exception e) { + Assert.fail(); + } + } + + @Test + public void getEngineRegressionTest() { + // TODO refactor getEngine to use dependency injection, so a mock Engine + // object can be used. + + Engine engine = null; + when(request.getParameter("engine")).thenReturn("{\"hello\":\"world\"}"); + + try { + engine = SUT.wrapGetEngine(request, project); + Assert.assertNotNull(engine); + } catch (Exception e) { + Assert.fail(); + } + + verify(request, times(1)).getParameter("engine"); + // JSON configuration doesn't have 'facets' key or 'INCLUDE_DEPENDENT' + // key, so there should be no further action + // Engine._facets is protected so can't test that it is of zero length. + } + + // ------------------ + @Test + public void getIntegerParameterWithNullParameters() { + // all null + try { + SUT.wrapGetIntegerParameter(null, null, 0); + Assert.fail(); + } catch (IllegalArgumentException e) { + // expected + } + + // request null + try { + SUT.wrapGetIntegerParameter(null, "name", 0); + Assert.fail(); + } catch (IllegalArgumentException e) { + // expected + } + } + + @Test + public void getIntegerParametersWithIncorrectParameterName() { + + when(request.getParameter(null)).thenReturn(null); + when(request.getParameter("incorrect")).thenReturn(null); + + // name null + try { + int returned = SUT.wrapGetIntegerParameter(request, null, 5); + Assert.assertEquals(5, returned); + } catch (IllegalArgumentException e) { + Assert.fail(); + } + + // name incorrect + try { + int returned = SUT.wrapGetIntegerParameter(request, "incorrect", 5); + Assert.assertEquals(5, returned); + } catch (IllegalArgumentException e) { + Assert.fail(); + } + + verify(request, times(1)).getParameter(null); + verify(request, times(1)).getParameter("incorrect"); + } + + @Test + public void getIntegerParametersRegressionTest() { + when(request.getParameter("positivenumber")).thenReturn("22"); + when(request.getParameter("zeronumber")).thenReturn("0"); + when(request.getParameter("negativenumber")).thenReturn("-40"); + + // positive + try { + int returned = SUT.wrapGetIntegerParameter(request,"positivenumber", 5); + Assert.assertEquals(22, returned); + } catch (IllegalArgumentException e) { + Assert.fail(); + } + + // zero + try { + int returned = SUT.wrapGetIntegerParameter(request, "zeronumber", 5); + Assert.assertEquals(0, returned); + } catch (IllegalArgumentException e) { + Assert.fail(); + } + + // negative + try { + int returned = SUT.wrapGetIntegerParameter(request, + "negativenumber", 5); + Assert.assertEquals(-40, returned); + } catch (IllegalArgumentException e) { + Assert.fail(); + } + + verify(request, times(1)).getParameter("positivenumber"); + verify(request, times(1)).getParameter("zeronumber"); + verify(request, times(1)).getParameter("negativenumber"); + } + + // ---------------------getJsonParameter tests---------------- + @Test + public void getJsonParameterWithNullParameters() { + when(request.getParameter(null)).thenReturn(null); + when(request.getParameter("")).thenReturn(null); + + try { + SUT.wrapGetJsonParameter(null, null); + Assert.fail(); + } catch (IllegalArgumentException e) { + // expected + } + + Assert.assertNull(SUT.wrapGetJsonParameter(request, null)); + + try { + SUT.wrapGetJsonParameter(null, "test"); + } catch (IllegalArgumentException e) { + // expected + } + + Assert.assertNull(SUT.wrapGetJsonParameter(request, "")); + + verify(request, times(1)).getParameter(null); + verify(request, times(1)).getParameter(""); + } + + @Test + public void getJsonParameterRegressionTest() { + when(request.getParameter("test")).thenReturn("{\"foo\":\"bar\"}"); + + JSONObject o = SUT.wrapGetJsonParameter(request, "test"); + Assert.assertNotNull(o); + try { + Assert.assertEquals("bar", o.getString("foo")); + } catch (JSONException e) { + Assert.fail(); + } + + verify(request, times(1)).getParameter("test"); + } + + @Test + public void getJsonParameterWithMalformedJson() { + when(request.getParameter("test")).thenReturn("brokenJSON"); + + try { + Assert.assertNull(SUT.wrapGetJsonParameter(request, "test")); + } catch (Exception e) { + Assert.fail(); + } + + verify(request, times(1)).getParameter("test"); + } +} diff --git a/main/tests/server/src/com/google/refine/tests/commands/util/CancelProcessesCommandTests.java b/main/tests/server/src/com/google/refine/tests/commands/util/CancelProcessesCommandTests.java new file mode 100644 index 000000000..b8d4a07da --- /dev/null +++ b/main/tests/server/src/com/google/refine/tests/commands/util/CancelProcessesCommandTests.java @@ -0,0 +1,212 @@ +package com.google.refine.tests.commands.util; + +import static org.mockito.Matchers.anyLong; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.io.IOException; +import java.io.PrintWriter; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.slf4j.LoggerFactory; +import org.testng.Assert; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.Test; + +import com.google.refine.ProjectManager; +import com.google.refine.commands.history.CancelProcessesCommand; +import com.google.refine.model.Project; +import com.google.refine.process.ProcessManager; +import com.google.refine.tests.GridworksTest; + +public class CancelProcessesCommandTests extends GridworksTest { + + @BeforeTest + public void init() { + logger = LoggerFactory.getLogger(this.getClass()); + } + + // System Under Test + CancelProcessesCommand SUT = null; + + // variables + long PROJECT_ID_LONG = 1234; + String PROJECT_ID = "1234"; + + // mocks + HttpServletRequest request = null; + HttpServletResponse response = null; + ProjectManager projMan = null; + Project proj = null; + ProcessManager processMan = null; + PrintWriter pw = null; + + @BeforeMethod + public void SetUp() { + projMan = mock(ProjectManager.class); + ProjectManager.singleton = projMan; + proj = mock(Project.class); + processMan = mock(ProcessManager.class); + pw = mock(PrintWriter.class); + + request = mock(HttpServletRequest.class); + response = mock(HttpServletResponse.class); + SUT = new CancelProcessesCommand(); + } + + @AfterMethod + public void TearDown() { + SUT = null; + + projMan = null; + ProjectManager.singleton = null; + proj = null; + pw = null; + request = null; + response = null; + } + + @Test + public void doPostFailsThrowsWithNullParameters() { + + // both parameters null + try { + SUT.doPost(null, null); + Assert.fail(); // should have thrown exception by this point + } catch (IllegalArgumentException e){ + //expected + } catch (ServletException e) { + Assert.fail(); + } catch (Exception e) { + Assert.fail(); + } + + // request is null + try { + SUT.doPost(null, response); + Assert.fail(); // should have thrown exception by this point + } catch (IllegalArgumentException e){ + //expected + } catch (ServletException e) { + Assert.fail(); + } catch (Exception e) { + Assert.fail(); + } + + // response parameter null + try { + SUT.doPost(request, null); + Assert.fail(); // should have thrown exception by this point + } catch (IllegalArgumentException e){ + // expected + } catch (ServletException e) { + Assert.fail(); + } catch (Exception e) { + Assert.fail(); + } + } + + /** + * Contract for a complete working post + */ + @Test + public void doPostRegressionTest() { + + // mock dependencies + when(request.getParameter("project")).thenReturn(PROJECT_ID); + when(projMan.getProject(anyLong())).thenReturn(proj); + when(proj.getProcessManager()).thenReturn(processMan); + try { + when(response.getWriter()).thenReturn(pw); + } catch (IOException e1) { + Assert.fail(); + } + + // run + try { + SUT.doPost(request, response); + } catch (ServletException e) { + Assert.fail(); + } catch (IOException e) { + Assert.fail(); + } + + // verify + verify(request, times(1)).getParameter("project"); + verify(projMan, times(1)).getProject(PROJECT_ID_LONG); + + verify(processMan, times(1)).cancelAll(); + verify(response, times(1)).setCharacterEncoding("UTF-8"); + verify(response, times(1)) + .setHeader("Content-Type", "application/json"); + verify(proj, times(1)).getProcessManager(); + try { + verify(response, times(1)).getWriter(); + } catch (IOException e) { + Assert.fail(); + } + verify(pw, times(1)).write("{ \"code\" : \"ok\" }"); + } + + @Test + public void doPostThrowsIfCommand_getProjectReturnsNull(){ + // mock dependencies + when(request.getParameter("project")).thenReturn(PROJECT_ID); + when(projMan.getProject(anyLong())) + .thenReturn(null); + + // run + try { + SUT.doPost(request, response); + } catch (ServletException e) { + //expected + } catch (IOException e) { + Assert.fail(); + } + + // verify + verify(request, times(1)).getParameter("project"); + verify(projMan, times(1)).getProject(PROJECT_ID_LONG); + } + + @Test + public void doPostCatchesExceptionFromWriter(){ + String ERROR_MESSAGE = "hello world"; + + // mock dependencies + when(request.getParameter("project")).thenReturn(PROJECT_ID); + when(projMan.getProject(anyLong())).thenReturn(proj); + when(proj.getProcessManager()).thenReturn(processMan); + try { + when(response.getWriter()).thenThrow(new IllegalStateException(ERROR_MESSAGE)) + .thenReturn(pw); + } catch (IOException e) { + Assert.fail(); + } + + // run + try { + SUT.doPost(request, response); + } catch (ServletException e) { + Assert.fail(); + } catch (IOException e) { + Assert.fail(); + } + + verify(request, times(1)).getParameter("project"); + verify(projMan, times(1)).getProject(PROJECT_ID_LONG); + + verify(processMan, times(1)).cancelAll(); + verify(response, times(3)).setCharacterEncoding("UTF-8"); + //omitted other verifications for brevity. + //assumption is that expecting response.setCharacterEncoding times(3) + //implies it has Command.respondException has been called as expected + } +} diff --git a/main/tests/server/src/com/google/refine/tests/exporters/CsvExporterTests.java b/main/tests/server/src/com/google/refine/tests/exporters/CsvExporterTests.java new file mode 100644 index 000000000..3e3a57b91 --- /dev/null +++ b/main/tests/server/src/com/google/refine/tests/exporters/CsvExporterTests.java @@ -0,0 +1,186 @@ +package com.google.refine.tests.exporters; + +import java.io.IOException; +import java.io.StringWriter; +import java.util.Properties; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.times; + +import org.slf4j.LoggerFactory; +import org.testng.Assert; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.Test; + +import com.google.refine.browsing.Engine; +import com.google.refine.exporters.CsvExporter; +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.ModelException; +import com.google.refine.model.Project; +import com.google.refine.model.Row; +import com.google.refine.tests.GridworksTest; + +public class CsvExporterTests extends GridworksTest { + + @BeforeTest + public void init() { + logger = LoggerFactory.getLogger(this.getClass()); + } + + //dependencies + StringWriter writer; + Project project; + Engine engine; + Properties options; + + //System Under Test + CsvExporter SUT; + + @BeforeMethod + public void SetUp(){ + SUT = new CsvExporter(); + writer = new StringWriter(); + project = new Project(); + engine = new Engine(project); + options = mock(Properties.class); + } + + @AfterMethod + public void TearDown(){ + SUT = null; + writer = null; + project = null; + engine = null; + options = null; + } + + @Test + public void exportSimpleCsv(){ + CreateGrid(2, 2); + + try { + SUT.export(project, options, engine, writer); + } catch (IOException e) { + Assert.fail(); + } + + Assert.assertEquals(writer.toString(), "column0,column1\n" + + "row0cell0,row0cell1\n" + + "row1cell0,row1cell1\n"); + + } + + @Test + public void exportSimpleCsvNoHeader(){ + CreateGrid(2, 2); + when(options.getProperty("printColumnHeader")).thenReturn("false"); + try { + SUT.export(project, options, engine, writer); + } catch (IOException e) { + Assert.fail(); + } + + Assert.assertEquals(writer.toString(), "row0cell0,row0cell1\n" + + "row1cell0,row1cell1\n"); + + verify(options,times(2)).getProperty("printColumnHeader"); + } + + @Test + public void exportCsvWithLineBreaks(){ + CreateGrid(3,3); + + project.rows.get(1).cells.set(1, new Cell("line\n\n\nbreak", null)); + try { + SUT.export(project, options, engine, writer); + } catch (IOException e) { + Assert.fail(); + } + + Assert.assertEquals(writer.toString(), "column0,column1,column2\n" + + "row0cell0,row0cell1,row0cell2\n" + + "row1cell0,\"line\n\n\nbreak\",row1cell2\n" + + "row2cell0,row2cell1,row2cell2\n"); + } + + @Test + public void exportCsvWithComma(){ + CreateGrid(3,3); + + project.rows.get(1).cells.set(1, new Cell("with, comma", null)); + try { + SUT.export(project, options, engine, writer); + } catch (IOException e) { + Assert.fail(); + } + + Assert.assertEquals(writer.toString(), "column0,column1,column2\n" + + "row0cell0,row0cell1,row0cell2\n" + + "row1cell0,\"with, comma\",row1cell2\n" + + "row2cell0,row2cell1,row2cell2\n"); + } + + @Test + public void exportCsvWithQuote(){ + CreateGrid(3,3); + + project.rows.get(1).cells.set(1, new Cell("line has \"quote\"", null)); + try { + SUT.export(project, options, engine, writer); + } catch (IOException e) { + Assert.fail(); + } + + Assert.assertEquals(writer.toString(), "column0,column1,column2\n" + + "row0cell0,row0cell1,row0cell2\n" + + "row1cell0,\"line has \"\"quote\"\"\",row1cell2\n" + + "row2cell0,row2cell1,row2cell2\n"); + } + + @Test + public void exportCsvWithEmptyCells(){ + CreateGrid(3,3); + + project.rows.get(1).cells.set(1, null); + project.rows.get(2).cells.set(0, null); + try { + SUT.export(project, options, engine, writer); + } catch (IOException e) { + Assert.fail(); + } + + Assert.assertEquals(writer.toString(), "column0,column1,column2\n" + + "row0cell0,row0cell1,row0cell2\n" + + "row1cell0,,row1cell2\n" + + ",row2cell1,row2cell2\n"); + } + + //helper methods + + protected void CreateColumns(int noOfColumns){ + for(int i = 0; i < noOfColumns; i++){ + try { + project.columnModel.addColumn(i, new Column(i, "column" + i), true); + } catch (ModelException e1) { + Assert.fail("Could not create column"); + } + } + } + + protected void CreateGrid(int noOfRows, int noOfColumns){ + CreateColumns(noOfColumns); + + for(int i = 0; i < noOfRows; i++){ + Row row = new Row(noOfColumns); + for(int j = 0; j < noOfColumns; j++){ + row.cells.add(new Cell("row" + i + "cell" + j, null)); + } + project.rows.add(row); + } + } +} diff --git a/main/tests/server/src/com/google/refine/tests/exporters/TsvExporterTests.java b/main/tests/server/src/com/google/refine/tests/exporters/TsvExporterTests.java new file mode 100644 index 000000000..94bc48d93 --- /dev/null +++ b/main/tests/server/src/com/google/refine/tests/exporters/TsvExporterTests.java @@ -0,0 +1,188 @@ +package com.google.refine.tests.exporters; + +import java.io.IOException; +import java.io.StringWriter; +import java.util.Properties; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import org.slf4j.LoggerFactory; +import org.testng.Assert; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.Test; + +import com.google.refine.browsing.Engine; +import com.google.refine.exporters.CsvExporter; +import com.google.refine.model.Cell; +import com.google.refine.model.Column; +import com.google.refine.model.ModelException; +import com.google.refine.model.Project; +import com.google.refine.model.Row; +import com.google.refine.tests.GridworksTest; + +public class TsvExporterTests extends GridworksTest { + + @BeforeTest + public void init() { + logger = LoggerFactory.getLogger(this.getClass()); + } + + //dependencies + StringWriter writer; + Project project; + Engine engine; + Properties options; + + //System Under Test + CsvExporter SUT; + + @BeforeMethod + public void SetUp(){ + SUT = new CsvExporter('\t');//new TsvExporter(); + writer = new StringWriter(); + project = new Project(); + engine = new Engine(project); + options = mock(Properties.class); + } + + @AfterMethod + public void TearDown(){ + SUT = null; + writer = null; + project = null; + engine = null; + options = null; + } + + @Test + public void exportSimpleTsv(){ + CreateGrid(2, 2); + + try { + SUT.export(project, options, engine, writer); + } catch (IOException e) { + Assert.fail(); + } + + Assert.assertEquals(writer.toString(), "column0\tcolumn1\n" + + "row0cell0\trow0cell1\n" + + "row1cell0\trow1cell1\n"); + + } + + @Test + public void exportSimpleTsvNoHeader(){ + CreateGrid(2, 2); + when(options.getProperty("printColumnHeader")).thenReturn("false"); + try { + SUT.export(project, options, engine, writer); + } catch (IOException e) { + Assert.fail(); + } + + Assert.assertEquals(writer.toString(), "row0cell0\trow0cell1\n" + + "row1cell0\trow1cell1\n"); + + verify(options,times(2)).getProperty("printColumnHeader"); + } + + @Test + public void exportTsvWithLineBreaks(){ + CreateGrid(3,3); + + project.rows.get(1).cells.set(1, new Cell("line\n\n\nbreak", null)); + try { + SUT.export(project, options, engine, writer); + } catch (IOException e) { + Assert.fail(); + } + + Assert.assertEquals(writer.toString(), "column0\tcolumn1\tcolumn2\n" + + "row0cell0\trow0cell1\trow0cell2\n" + + "row1cell0\t\"line\n\n\nbreak\"\trow1cell2\n" + + "row2cell0\trow2cell1\trow2cell2\n"); + } + + @Test + public void exportTsvWithComma(){ + CreateGrid(3,3); + + project.rows.get(1).cells.set(1, new Cell("with\t tab", null)); + try { + SUT.export(project, options, engine, writer); + } catch (IOException e) { + Assert.fail(); + } + + Assert.assertEquals(writer.toString(), "column0\tcolumn1\tcolumn2\n" + + "row0cell0\trow0cell1\trow0cell2\n" + + "row1cell0\t\"with\t tab\"\trow1cell2\n" + + "row2cell0\trow2cell1\trow2cell2\n"); + } + + @Test + public void exportTsvWithQuote(){ + CreateGrid(3,3); + + project.rows.get(1).cells.set(1, new Cell("line has \"quote\"", null)); + try { + SUT.export(project, options, engine, writer); + } catch (IOException e) { + Assert.fail(); + } + + Assert.assertEquals(writer.toString(), "column0\tcolumn1\tcolumn2\n" + + "row0cell0\trow0cell1\trow0cell2\n" + + "row1cell0\t\"line has \"\"quote\"\"\"\trow1cell2\n" + + "row2cell0\trow2cell1\trow2cell2\n"); + } + + @Test + public void exportTsvWithEmptyCells(){ + CreateGrid(3,3); + + project.rows.get(1).cells.set(1, null); + project.rows.get(2).cells.set(0, null); + try { + SUT.export(project, options, engine, writer); + } catch (IOException e) { + Assert.fail(); + } + + Assert.assertEquals(writer.toString(), "column0\tcolumn1\tcolumn2\n" + + "row0cell0\trow0cell1\trow0cell2\n" + + "row1cell0\t\trow1cell2\n" + + "\trow2cell1\trow2cell2\n"); + } + + //helper methods + + protected void CreateColumns(int noOfColumns){ + for(int i = 0; i < noOfColumns; i++){ + try { + project.columnModel.addColumn(i, new Column(i, "column" + i), true); + project.columnModel.columns.get(i).getCellIndex(); + } catch (ModelException e1) { + Assert.fail("Could not create column"); + } + } + } + + protected void CreateGrid(int noOfRows, int noOfColumns){ + CreateColumns(noOfColumns); + + for(int i = 0; i < noOfRows; i++){ + Row row = new Row(noOfColumns); + for(int j = 0; j < noOfColumns; j++){ + row.cells.add(new Cell("row" + i + "cell" + j, null)); + } + project.rows.add(row); + } + } +} + diff --git a/main/tests/server/src/com/google/refine/tests/history/HistoryTests.java b/main/tests/server/src/com/google/refine/tests/history/HistoryTests.java new file mode 100644 index 000000000..3f73542b7 --- /dev/null +++ b/main/tests/server/src/com/google/refine/tests/history/HistoryTests.java @@ -0,0 +1,68 @@ +package com.google.refine.tests.history; + +import org.mockito.Mockito; +import org.slf4j.LoggerFactory; +import org.testng.Assert; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.Test; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.times; + +import com.google.refine.ProjectManager; +import com.google.refine.ProjectMetadata; +import com.google.refine.history.History; +import com.google.refine.history.HistoryEntry; +import com.google.refine.model.Project; +import com.google.refine.tests.GridworksTest; + + +public class HistoryTests extends GridworksTest { + @BeforeTest + public void init() { + logger = LoggerFactory.getLogger(this.getClass()); + } + + //System Under Test + History SUT; + + //dependencies + Project proj; + ProjectManager projectManager; + + @BeforeMethod + public void SetUp(){ + projectManager = mock(ProjectManager.class); + ProjectManager.singleton = projectManager; + proj = new Project(); + SUT = new History(proj); + } + + @AfterMethod + public void TearDown(){ + SUT = null; + proj = null; + } + + @Test + public void canAddEntry(){ + //local dependencies + HistoryEntry entry = mock(HistoryEntry.class); + Project project = mock(Project.class); + ProjectMetadata projectMetadata = mock(ProjectMetadata.class); + + when(projectManager.getProject(Mockito.anyLong())).thenReturn(project); + when(projectManager.getProjectMetadata(Mockito.anyLong())).thenReturn(projectMetadata); + + SUT.addEntry(entry); + + verify(projectManager, times(1)).getProject(Mockito.anyLong()); + verify(entry, times(1)).apply(project); + verify(projectMetadata, times(1)).updateModified(); + Assert.assertEquals(SUT.getLastPastEntries(1).get(0), entry); + } +} diff --git a/main/tests/server/src/com/google/refine/tests/importers/ImporterUtilitiesTests.java b/main/tests/server/src/com/google/refine/tests/importers/ImporterUtilitiesTests.java new file mode 100644 index 000000000..461ecf316 --- /dev/null +++ b/main/tests/server/src/com/google/refine/tests/importers/ImporterUtilitiesTests.java @@ -0,0 +1,125 @@ +package com.google.refine.tests.importers; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +import org.slf4j.LoggerFactory; +import org.testng.Assert; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.Test; + +import com.google.refine.importers.ImporterUtilities; +import com.google.refine.model.Cell; +import com.google.refine.model.Project; +import com.google.refine.model.Row; +import com.google.refine.tests.GridworksTest; + +public class ImporterUtilitiesTests extends GridworksTest { + + @BeforeTest + public void init() { + logger = LoggerFactory.getLogger(this.getClass()); + } + + @Test(enabled=false) + public void parseCellValueWithText(){ + String END_QUOTES_SHOULD_BE_RETAINED = "\"To be\" is almost always followed by \"or not to be\""; + String response = (String) ImporterUtilities.parseCellValue(END_QUOTES_SHOULD_BE_RETAINED); + Assert.assertEquals(response, END_QUOTES_SHOULD_BE_RETAINED); + } + + @Test + public void getIntegerOption(){ + Properties options = mock(Properties.class); + when(options.containsKey("testInteger")).thenReturn(true); + when(options.getProperty("testInteger")).thenReturn("5"); + int response = ImporterUtilities.getIntegerOption("testInteger", options, -1); + Assert.assertEquals(5, response); + verify(options, times(1)).containsKey("testInteger"); + verify(options, times(1)).getProperty("testInteger"); + } + + @Test + public void getIntegerOptionReturnsDefaultOnError(){ + Properties options = mock(Properties.class); + when(options.containsKey("testInteger")).thenReturn(true); + when(options.getProperty("testInteger")).thenReturn("notAnInteger"); + int response = ImporterUtilities.getIntegerOption("testInteger", options, -1); + Assert.assertEquals(-1, response); + verify(options, times(1)).containsKey("testInteger"); + verify(options, times(1)).getProperty("testInteger"); + } + + @Test + public void appendColumnName(){ + List columnNames = new ArrayList(); + + + ImporterUtilities.appendColumnName(columnNames, 0, "foo"); + ImporterUtilities.appendColumnName(columnNames, 1, "bar"); + Assert.assertEquals(columnNames.size(), 2); + Assert.assertEquals(columnNames.get(0), "foo"); + Assert.assertEquals(columnNames.get(1), "bar"); + } + + @Test + public void appendColumnNameFromMultipleRows(){ + List columnNames = new ArrayList(); + + ImporterUtilities.appendColumnName(columnNames, 0, "foo"); + ImporterUtilities.appendColumnName(columnNames, 0, "bar"); + Assert.assertEquals(columnNames.size(), 1); + Assert.assertEquals(columnNames.get(0), "foo bar"); + } + + @Test + public void ensureColumnsInRowExist(){ + String VALUE_1 = "value1"; + String VALUE_2 = "value2"; + Row row = new Row(2); + ArrayList columnNames = new ArrayList(2); + columnNames.add(VALUE_1); + columnNames.add(VALUE_2); + + ImporterUtilities.ensureColumnsInRowExist(columnNames, row); + + Assert.assertEquals(columnNames.size(), 2); + Assert.assertEquals(columnNames.get(0), VALUE_1); + Assert.assertEquals(columnNames.get(1), VALUE_2); + } + + @Test + public void ensureColumnsInRowExistDoesExpand(){ + Row row = new Row(4); + for(int i = 1; i < 5; i++) + row.cells.add(new Cell("value" + i, null)); + + ArrayList columnNames = new ArrayList(2); + + + ImporterUtilities.ensureColumnsInRowExist(columnNames, row); + + Assert.assertEquals(row.cells.size(), 4); + Assert.assertEquals(columnNames.size(), 4); + } + + @Test + public void setupColumns(){ + Project project = new Project(); + List columnNames = new ArrayList(); + columnNames.add("col1"); + columnNames.add("col2"); + columnNames.add(""); + ImporterUtilities.setupColumns(project, columnNames); + Assert.assertEquals( project.columnModel.columns.get(0).getName(), "col1" ); + Assert.assertEquals( project.columnModel.columns.get(1).getName(), "col2" ); + Assert.assertEquals( project.columnModel.columns.get(2).getName(), "Column"); + } + +} diff --git a/main/tests/server/src/com/google/refine/tests/importers/RdfTripleImporterTests.java b/main/tests/server/src/com/google/refine/tests/importers/RdfTripleImporterTests.java new file mode 100644 index 000000000..ea5dc7f02 --- /dev/null +++ b/main/tests/server/src/com/google/refine/tests/importers/RdfTripleImporterTests.java @@ -0,0 +1,159 @@ +package com.google.refine.tests.importers; + +import java.io.StringReader; +import java.util.Properties; + +import org.slf4j.LoggerFactory; +import org.testng.Assert; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.Test; + +import com.google.refine.ProjectMetadata; +import com.google.refine.importers.RdfTripleImporter; +import com.google.refine.model.Project; +import com.google.refine.tests.GridworksTest; + + +public class RdfTripleImporterTests extends GridworksTest { + + @BeforeTest + public void init() { + logger = LoggerFactory.getLogger(this.getClass()); + } + + + //System Under Test + RdfTripleImporter SUT = null; + Project project = null; + Properties options = null; + + @BeforeMethod + public void SetUp(){ + SUT = new RdfTripleImporter(); + project = new Project(); + options = new Properties(); + options.put("base-url", "http://rdf.freebase.com"); + } + + @Test(enabled=false) + public void CanParseSingleLineTriple(){ + String sampleRdf = " ."; + StringReader reader = new StringReader(sampleRdf); + + try { + SUT.read(reader, project, new ProjectMetadata(), options); + project.update(); + } catch (Exception e) { + Assert.fail(); + } + + Assert.assertEquals(project.columnModel.columns.size(), 2); + Assert.assertEquals(project.columnModel.columns.get(0).getName(), "subject"); + Assert.assertEquals(project.columnModel.columns.get(1).getName(), "http://rdf.freebase.com/ns/music.artist.album"); + Assert.assertEquals(project.rows.size(), 1); + Assert.assertEquals(project.rows.get(0).cells.size(), 2); + Assert.assertEquals(project.rows.get(0).cells.get(0).value, "http://rdf.freebase.com/ns/en.bob_dylan"); + Assert.assertEquals(project.rows.get(0).cells.get(1).value, "http://rdf.freebase.com/ns/en.blood_on_the_tracks"); + } + + @Test + public void CanParseMultiLineTriple(){ + String sampleRdf = " .\n" + + " .\n" + + " ."; + StringReader reader = new StringReader(sampleRdf); + + try { + SUT.read(reader, project, new ProjectMetadata(), options); + project.update(); + } catch (Exception e) { + Assert.fail(); + } + + //columns + Assert.assertEquals(project.columnModel.columns.size(), 2); + Assert.assertEquals(project.columnModel.columns.get(0).getName(), "subject"); + Assert.assertEquals(project.columnModel.columns.get(1).getName(), "http://rdf.freebase.com/ns/music.artist.album"); + + //rows + Assert.assertEquals(project.rows.size(), 3); + + //row0 + Assert.assertEquals(project.rows.get(0).cells.size(), 2); + Assert.assertEquals(project.rows.get(0).cells.get(0).value, "http://rdf.freebase.com/ns/en.bob_dylan"); + Assert.assertEquals(project.rows.get(0).cells.get(1).value, "http://rdf.freebase.com/ns/en.blood_on_the_tracks"); + + //row1 + Assert.assertEquals(project.rows.get(1).cells.size(), 2); + Assert.assertNull(project.rows.get(1).cells.get(0)); + Assert.assertEquals(project.rows.get(1).cells.get(1).value, "http://rdf.freebase.com/ns/en.bringing_it_all_back_home"); //NB triples aren't created in order they were input + Assert.assertEquals(project.recordModel.getRowDependency(1).cellDependencies[1].rowIndex, 0); + Assert.assertEquals(project.recordModel.getRowDependency(1).cellDependencies[1].cellIndex, 0); + + //row2 + Assert.assertEquals(project.rows.get(2).cells.size(), 2); + Assert.assertNull(project.rows.get(2).cells.get(0)); + Assert.assertEquals(project.rows.get(2).cells.get(1).value, "http://rdf.freebase.com/ns/en.under_the_red_sky"); //NB triples aren't created in order they were input + Assert.assertEquals(project.recordModel.getRowDependency(2).cellDependencies[1].rowIndex, 0); + Assert.assertEquals(project.recordModel.getRowDependency(2).cellDependencies[1].cellIndex, 0); + } + + @Test + public void CanParseMultiLineMultiPredicatesTriple(){ + String sampleRdf = " .\n" + + " .\n" + + " ."; + StringReader reader = new StringReader(sampleRdf); + + try { + SUT.read(reader, project, new ProjectMetadata(), options); + project.update(); + } catch (Exception e) { + Assert.fail(); + } + + //columns + Assert.assertEquals(project.columnModel.columns.size(), 3); + Assert.assertEquals(project.columnModel.columns.get(0).getName(), "subject"); + Assert.assertEquals(project.columnModel.columns.get(1).getName(), "http://rdf.freebase.com/ns/music.artist.album"); + Assert.assertEquals(project.columnModel.columns.get(2).getName(), "http://rdf.freebase.com/ns/music.artist.genre"); + + //rows + Assert.assertEquals(project.rows.size(), 2); + + //row0 + Assert.assertEquals(project.rows.get(0).cells.size(), 3); + Assert.assertEquals(project.rows.get(0).cells.get(0).value, "http://rdf.freebase.com/ns/en.bob_dylan"); + Assert.assertEquals(project.rows.get(0).cells.get(1).value, "http://rdf.freebase.com/ns/en.blood_on_the_tracks"); + Assert.assertEquals(project.rows.get(0).cells.get(2).value, "http://rdf.freebase.com/ns/en.folk_rock"); + + //row1 + Assert.assertEquals(project.rows.get(1).cells.size(), 2); + Assert.assertNull(project.rows.get(1).cells.get(0)); + Assert.assertEquals(project.rows.get(1).cells.get(1).value, "http://rdf.freebase.com/ns/en.bringing_it_all_back_home"); + Assert.assertEquals(project.recordModel.getRowDependency(1).cellDependencies[1].rowIndex, 0); + Assert.assertEquals(project.recordModel.getRowDependency(1).cellDependencies[1].cellIndex, 0); + } + + @Test + public void CanParseTripleWithValue(){ + String sampleRdf = " \"Robert Zimmerman\"@en."; + StringReader reader = new StringReader(sampleRdf); + + try { + SUT.read(reader, project, new ProjectMetadata(), options); + project.update(); + } catch (Exception e) { + Assert.fail(); + } + + Assert.assertEquals(project.columnModel.columns.size(), 2); + Assert.assertEquals(project.columnModel.columns.get(0).getName(), "subject"); + Assert.assertEquals(project.columnModel.columns.get(1).getName(), "http://rdf.freebase.com/ns/common.topic.alias"); + Assert.assertEquals(project.rows.size(), 1); + Assert.assertEquals(project.rows.get(0).cells.size(), 2); + Assert.assertEquals(project.rows.get(0).cells.get(0).value, "http://rdf.freebase.com/ns/en.bob_dylan"); + Assert.assertEquals(project.rows.get(0).cells.get(1).value, "\"Robert Zimmerman\"@en"); + } +} diff --git a/main/tests/server/src/com/google/refine/tests/importers/TsvCsvImporterTests.java b/main/tests/server/src/com/google/refine/tests/importers/TsvCsvImporterTests.java new file mode 100644 index 000000000..9be4c7b0a --- /dev/null +++ b/main/tests/server/src/com/google/refine/tests/importers/TsvCsvImporterTests.java @@ -0,0 +1,549 @@ +package com.google.refine.tests.importers; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.io.IOException; +import java.io.LineNumberReader; +import java.io.StringReader; +import java.util.Properties; + +import org.slf4j.LoggerFactory; +import org.testng.Assert; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import com.google.refine.ProjectMetadata; +import com.google.refine.importers.TsvCsvImporter; +import com.google.refine.model.Project; +import com.google.refine.tests.GridworksTest; + +public class TsvCsvImporterTests extends GridworksTest { + + @BeforeTest + public void init() { + logger = LoggerFactory.getLogger(this.getClass()); + } + + //constants + String SAMPLE_ROW = "NDB_No,Shrt_Desc,Water"; + + //System Under Test + TsvCsvImporter SUT = null; + + //mock dependencies + Project project = null; + Properties properties = null; + + + @BeforeMethod + public void SetUp(){ + SUT = new TsvCsvImporter(); + project = new Project(); //FIXME - should we try and use mock(Project.class); - seems unnecessary complexity + properties = mock(Properties.class); + } + + @AfterMethod + public void TearDown(){ + SUT = null; + project = null; + properties = null; + } + + @Test(dataProvider = "CSV-or-null") + public void readJustColumns(String sep){ + String input = "col1,col2,col3"; + LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); + + try { + SUT.read(lnReader, project, sep, -1, 0, 0, 1, false, true, false); + } catch (IOException e) { + Assert.fail(); + } + Assert.assertEquals(project.columnModel.columns.size(), 3); + Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1"); + Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2"); + Assert.assertEquals(project.columnModel.columns.get(2).getName(), "col3"); + } + + @Test(dataProvider = "CSV-or-null") + public void readUnseperatedData(String sep){ + String input = "value1,value2,value3"; + LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); + + try { + SUT.read(lnReader, project, sep, -1, 0, 0, 0, false, false, false); + } catch (IOException e) { + Assert.fail(); + } + Assert.assertEquals(project.columnModel.columns.size(), 1); + Assert.assertEquals(project.columnModel.columns.get(0).getName(), "Column"); + Assert.assertEquals(project.rows.size(), 1); + Assert.assertEquals(project.rows.get(0).cells.size(), 1); + Assert.assertEquals(project.rows.get(0).cells.get(0).value, input); + } + + @Test(dataProvider = "CSV-or-null") + public void readSimpleData_CSV_1Header_1Row(String sep){ + String input = "col1,col2,col3\n" + + "data1,data2,data3"; + LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); + try { + SUT.read(lnReader, project, sep, -1, 0, 0, 1, false, true, false); + } catch (IOException e) { + Assert.fail(); + } + Assert.assertEquals(project.columnModel.columns.size(), 3); + Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1"); + Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2"); + Assert.assertEquals(project.columnModel.columns.get(2).getName(), "col3"); + Assert.assertEquals(project.rows.size(), 1); + Assert.assertEquals(project.rows.get(0).cells.size(), 3); + Assert.assertEquals(project.rows.get(0).cells.get(0).value, "data1"); + Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data2"); + Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data3"); + } + + @Test + public void readSimpleData_TSV_1Header_1Row(){ + String input = "col1\tcol2\tcol3\n" + + "data1\tdata2\tdata3"; + LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); + try { + SUT.read(lnReader, project, "\t", -1, 0, 0, 1, false, true, false); + } catch (IOException e) { + Assert.fail(); + } + Assert.assertEquals(project.columnModel.columns.size(), 3); + Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1"); + Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2"); + Assert.assertEquals(project.columnModel.columns.get(2).getName(), "col3"); + Assert.assertEquals(project.rows.size(), 1); + Assert.assertEquals(project.rows.get(0).cells.size(), 3); + Assert.assertEquals(project.rows.get(0).cells.get(0).value, "data1"); + Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data2"); + Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data3"); + } + + @Test(dataProvider = "CSV-or-null") + public void readSimpleData_CSV_1Header_1Row_GuessValues(String sep){ + String input = "col1,col2,col3\n" + + "data1,234,data3"; + LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); + try { + SUT.read(lnReader, project, sep, -1, 0, 0, 1, true, true, false); + } catch (IOException e) { + Assert.fail(); + } + Assert.assertEquals(project.columnModel.columns.size(), 3); + Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1"); + Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2"); + Assert.assertEquals(project.columnModel.columns.get(2).getName(), "col3"); + Assert.assertEquals(project.rows.size(), 1); + Assert.assertEquals(project.rows.get(0).cells.size(), 3); + Assert.assertEquals(project.rows.get(0).cells.get(0).value, "data1"); + Assert.assertTrue(project.rows.get(0).cells.get(1).value instanceof Long); + Assert.assertEquals(project.rows.get(0).cells.get(1).value, Long.parseLong("234")); + Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data3"); + } + + @Test(dataProvider = "CSV-or-null") + public void readSimpleData_0Header_1Row(String sep){ + String input = "data1,data2,data3"; + LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); + try { + SUT.read(lnReader, project, sep, -1, 0, 0, 0, false, true, false); + } catch (IOException e) { + Assert.fail(); + } + Assert.assertEquals(project.columnModel.columns.size(), 3); + Assert.assertEquals(project.columnModel.columns.get(0).getName(), "Column"); + Assert.assertEquals(project.columnModel.columns.get(1).getName(), "Column2"); + Assert.assertEquals(project.columnModel.columns.get(2).getName(), "Column3"); + Assert.assertEquals(project.rows.size(), 1); + Assert.assertEquals(project.rows.get(0).cells.size(), 3); + Assert.assertEquals(project.rows.get(0).cells.get(0).value, "data1"); + Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data2"); + Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data3"); + } + + @Test(groups = { }, dataProvider = "CSV-or-null") + public void readDoesTrimsLeadingTrailingWhitespace(String sep){ + String input = " data1 , data2 , data3 "; + LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); + try { + SUT.read(lnReader, project, sep, -1, 0, 0, 0, false, true, false); + } catch (IOException e) { + Assert.fail(); + } + Assert.assertEquals(project.columnModel.columns.size(), 3); + Assert.assertEquals(project.rows.size(), 1); + Assert.assertEquals(project.rows.get(0).cells.size(), 3); + Assert.assertEquals(project.rows.get(0).cells.get(0).value, "data1"); + Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data2"); + Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data3"); + } + + @Test(dataProvider = "CSV-or-null") + public void readTrimsLeadingTrailingWhitespace(String sep){ + String input = " data1, data2, data3"; + LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); + try { + SUT.read(lnReader, project, sep, -1, 0, 0, 0, true, true, false); + } catch (IOException e) { + Assert.fail(); + } + Assert.assertEquals(project.columnModel.columns.size(), 3); + Assert.assertEquals(project.rows.size(), 1); + Assert.assertEquals(project.rows.get(0).cells.size(), 3); + Assert.assertEquals(project.rows.get(0).cells.get(0).value, "data1"); + Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data2"); + Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data3"); + } + + @Test(dataProvider = "CSV-or-null") + public void readCanAddNull(String sep){ + String input = " data1, , data3"; + LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); + try { + SUT.read(lnReader, project, sep, -1, 0, 0, 0, true, true, false); + } catch (IOException e) { + Assert.fail(); + } + Assert.assertEquals(project.columnModel.columns.size(), 3); + Assert.assertEquals(project.rows.size(), 1); + Assert.assertEquals(project.rows.get(0).cells.size(), 3); + Assert.assertEquals(project.rows.get(0).cells.get(0).value, "data1"); + Assert.assertNull(project.rows.get(0).cells.get(1)); + Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data3"); + } + + @Test(dataProvider = "CSV-or-null") + public void readSimpleData_2Header_1Row(String sep){ + String input = "col1,col2,col3\n" + + "sub1,sub2,sub3\n" + + "data1,data2,data3"; + LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); + try { + SUT.read(lnReader, project, sep, -1, 0, 0, 2, false, true, false); + } catch (IOException e) { + Assert.fail(); + } + Assert.assertEquals(project.columnModel.columns.size(), 3); + Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1 sub1"); + Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2 sub2"); + Assert.assertEquals(project.columnModel.columns.get(2).getName(), "col3 sub3"); + Assert.assertEquals(project.rows.size(), 1); + Assert.assertEquals(project.rows.get(0).cells.size(), 3); + Assert.assertEquals(project.rows.get(0).cells.get(0).value, "data1"); + Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data2"); + Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data3"); + } + + @Test(dataProvider = "CSV-or-null") + public void readSimpleData_RowLongerThanHeader(String sep){ + String input = "col1,col2,col3\n" + + "data1,data2,data3,data4,data5,data6"; + LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); + try { + SUT.read(lnReader, project, sep, -1, 0, 0, 1, false, true, false); + } catch (IOException e) { + Assert.fail(); + } + Assert.assertEquals(project.columnModel.columns.size(), 6); + Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1"); + Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2"); + Assert.assertEquals(project.columnModel.columns.get(2).getName(), "col3"); + Assert.assertEquals(project.columnModel.columns.get(3).getName(), "Column"); + Assert.assertEquals(project.columnModel.columns.get(3).getName(), "Column"); + Assert.assertEquals(project.columnModel.columns.get(3).getName(), "Column"); + Assert.assertEquals(project.rows.size(), 1); + Assert.assertEquals(project.rows.get(0).cells.size(), 6); + Assert.assertEquals(project.rows.get(0).cells.get(0).value, "data1"); + Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data2"); + Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data3"); + Assert.assertEquals(project.rows.get(0).cells.get(3).value, "data4"); + Assert.assertEquals(project.rows.get(0).cells.get(4).value, "data5"); + Assert.assertEquals(project.rows.get(0).cells.get(5).value, "data6"); + } + + @Test(groups = { }, dataProvider = "CSV-or-null") + public void readQuotedData(String sep){ + String input = "col1,col2,col3\n" + + "\"\"\"To Be\"\" is often followed by \"\"or not To Be\"\"\",data2"; + LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); + try { + SUT.read(lnReader, project, sep, -1, 0, 0, 1, false, true, false); + } catch (IOException e) { + Assert.fail(); + } + Assert.assertEquals(project.columnModel.columns.size(), 3); + Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1"); + Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2"); + Assert.assertEquals(project.columnModel.columns.get(2).getName(), "col3"); + Assert.assertEquals(project.rows.size(), 1); + Assert.assertEquals(project.rows.get(0).cells.size(), 2); + Assert.assertEquals(project.rows.get(0).cells.get(0).value, "\"To Be\" is often followed by \"or not To Be\""); + Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data2"); + } + + @Test(dataProvider = "CSV-or-null") + public void readIgnoreFirstLine(String sep){ + String input = "ignore1\n" + + "col1,col2,col3\n" + + "data1,data2,data3"; + LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); + try { + SUT.read(lnReader, project, sep, -1, 0, 1, 1, false, true, false); + } catch (IOException e) { + Assert.fail(); + } + Assert.assertEquals(project.columnModel.columns.size(), 3); + Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1"); + Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2"); + Assert.assertEquals(project.columnModel.columns.get(2).getName(), "col3"); + Assert.assertEquals(project.rows.size(), 1); + Assert.assertEquals(project.rows.get(0).cells.size(), 3); + Assert.assertEquals(project.rows.get(0).cells.get(0).value, "data1"); + Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data2"); + Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data3"); + } + + @Test(dataProvider = "CSV-or-null") + public void readSkipFirstDataLine(String sep){ + String input = "col1,col2,col3\n" + + "skip1\n" + + "data1,data2,data3"; + LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); + try { + SUT.read(lnReader, project, sep, -1, 1, 0, 1, false, true, false); + } catch (IOException e) { + Assert.fail(); + } + Assert.assertEquals(project.columnModel.columns.size(), 3); + Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1"); + Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2"); + Assert.assertEquals(project.columnModel.columns.get(2).getName(), "col3"); + Assert.assertEquals(project.rows.size(), 1); + Assert.assertEquals(project.rows.get(0).cells.size(), 3); + Assert.assertEquals(project.rows.get(0).cells.get(0).value, "data1"); + Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data2"); + Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data3"); + } + + @Test(dataProvider = "CSV-or-null") + public void readIgnore3_Header2_Skip1(String sep){ + String input = "ignore1\n" + + "ignore2\n" + + "ignore3\n" + + "col1,col2,col3\n" + + "sub1,sub2,sub3\n" + + "skip1\n" + + "data1,data2,data3"; + LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); + try { + SUT.read(lnReader, project, sep, -1, 1, 3, 2, false, true, false); + } catch (IOException e) { + Assert.fail(); + } + Assert.assertEquals(project.columnModel.columns.size(), 3); + Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1 sub1"); + Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2 sub2"); + Assert.assertEquals(project.columnModel.columns.get(2).getName(), "col3 sub3"); + Assert.assertEquals(project.rows.size(), 1); + Assert.assertEquals(project.rows.get(0).cells.size(), 3); + Assert.assertEquals(project.rows.get(0).cells.get(0).value, "data1"); + Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data2"); + Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data3"); + } + + @Test(groups = { }, dataProvider = "CSV-or-null") + public void readIgnore3_Header2_Skip2_limit2(String sep){ + String input = "ignore1\n" + + "ignore2\n" + + "ignore3\n" + + "col1,col2,col3\n" + + "sub1,sub2,sub3\n" + + "skip1\n" + + "skip2\n" + + "data-row1-cell1,data-row1-cell2,data-row1-cell3\n" + + "data-row2-cell1,data-row2-cell2,\n" + //missing last data point of this row on purpose + "data-row3-cell1,data-row3-cell2,data-row1-cell3"; + LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); + try { + SUT.read(lnReader, project, sep, 2, 2, 3, 2, false, true, false); + } catch (IOException e) { + Assert.fail(); + } + Assert.assertEquals(project.columnModel.columns.size(), 3); + Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1 sub1"); + Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2 sub2"); + Assert.assertEquals(project.columnModel.columns.get(2).getName(), "col3 sub3"); + Assert.assertEquals(project.rows.size(), 2); + Assert.assertEquals(project.rows.get(0).cells.size(), 3); + Assert.assertEquals(project.rows.get(0).cells.get(0).value, "data-row1-cell1"); + Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data-row1-cell2"); + Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data-row1-cell3"); + Assert.assertEquals(project.rows.get(1).cells.size(), 3); + Assert.assertEquals(project.rows.get(1).cells.get(0).value, "data-row2-cell1"); + Assert.assertEquals(project.rows.get(1).cells.get(1).value, "data-row2-cell2"); + Assert.assertNull(project.rows.get(1).cells.get(2)); + } + + @Test(dataProvider = "CSV-or-null") + public void ignoreQuotes(String sep){ + String input = "data1,data2\",data3,data4"; + LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); + try { + SUT.read(lnReader, project, sep, -1, 0, 0, 0, false, true, true); + } catch (IOException e) { + Assert.fail(); + } + //Assert.assertEquals(project.columnModel.columns.size(), 4); + Assert.assertEquals(project.rows.size(), 1); + //Assert.assertEquals(project.rows.get(0).cells.size(), 4); + Assert.assertEquals(project.rows.get(0).cells.get(0).value, "data1"); + Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data2"); + Assert.assertEquals(project.rows.get(0).cells.get(2).value, "data3"); + } + + @Test(groups = { }, dataProvider = "CSV-or-null") + public void readWithMultiLinedQuotedData(String sep){ + String input = "col1,col2,col3\n" + + "\"\"\"To\n Be\"\" is often followed by \"\"or not To\n Be\"\"\",data2"; + LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); + try { + SUT.read(lnReader, project, sep, -1, 0, 0, 1, false, true, false); + } catch (IOException e) { + Assert.fail(); + } + Assert.assertEquals(project.columnModel.columns.size(), 3); + Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1"); + Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2"); + Assert.assertEquals(project.columnModel.columns.get(2).getName(), "col3"); + Assert.assertEquals(project.rows.size(), 1); + Assert.assertEquals(project.rows.get(0).cells.size(), 2); + Assert.assertEquals(project.rows.get(0).cells.get(0).value, "\"To\n Be\" is often followed by \"or not To\n Be\""); + Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data2"); + } + + @Test(groups = { }, dataProvider = "CSV-or-null") + public void readWithMultiLinedQuotedDataAndBlankLines(String sep){ + String input = "col1,col2,col3\n" + + "\"A line with many \n\n\n\n\n empty lines\",data2"; + LineNumberReader lnReader = new LineNumberReader(new StringReader(input)); + try { + SUT.read(lnReader, project, sep, -1, 0, 0, 1, false, true, false); + } catch (IOException e) { + Assert.fail(); + } + Assert.assertEquals(project.columnModel.columns.size(), 3); + Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1"); + Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2"); + Assert.assertEquals(project.columnModel.columns.get(2).getName(), "col3"); + Assert.assertEquals(project.rows.size(), 1); + Assert.assertEquals(project.rows.get(0).cells.size(), 2); + Assert.assertEquals(project.rows.get(0).cells.get(0).value, "A line with many \n\n\n\n\n empty lines"); + Assert.assertEquals(project.rows.get(0).cells.get(1).value, "data2"); + } + + //---------------------read tests------------------------ + @Test + public void readCsvWithProperties(){ + StringReader reader = new StringReader(SAMPLE_ROW); + + when(properties.getProperty("separator")).thenReturn(","); + whenGetIntegerOption("ignore",properties,0); + whenGetIntegerOption("header-lines",properties,0); + whenGetIntegerOption("limit",properties,-1); + whenGetIntegerOption("skip",properties,0); + whenGetIntegerOption("ignore-quotes",properties,0); + + try { + SUT.read(reader, project, new ProjectMetadata(), properties); + } catch (Exception e) { + Assert.fail(); + } + + + Assert.assertEquals(project.rows.size(), 1); + Assert.assertEquals(project.rows.get(0).cells.size(), 3); + Assert.assertEquals((String)project.rows.get(0).cells.get(0).value, "NDB_No"); + Assert.assertEquals((String)project.rows.get(0).cells.get(1).value, "Shrt_Desc"); + Assert.assertEquals((String)project.rows.get(0).cells.get(2).value, "Water"); + + verify(properties, times(1)).getProperty("separator"); + verifyGetOption("ignore",properties); + verifyGetOption("header-lines",properties); + verifyGetOption("limit",properties); + verifyGetOption("skip",properties); + verifyGetOption("ignore-quotes",properties); + } + + @Test + public void readCsvWithPropertiesIgnoreQuotes(){ + String input = "data1,data2\",data3,data4"; + StringReader reader = new StringReader(input); + + when(properties.getProperty("separator")).thenReturn(","); + whenGetIntegerOption("ignore",properties,0); + whenGetIntegerOption("header-lines",properties,0); + whenGetIntegerOption("limit",properties,-1); + whenGetIntegerOption("skip",properties,0); + whenGetBooleanOption("ignore-quotes",properties,true); + + try { + SUT.read(reader, project, new ProjectMetadata(), properties); + } catch (Exception e) { + Assert.fail(); + } + + + Assert.assertEquals(project.rows.size(), 1); + Assert.assertEquals(project.rows.get(0).cells.size(), 4); + Assert.assertEquals((String)project.rows.get(0).cells.get(0).value, "data1"); + Assert.assertEquals((String)project.rows.get(0).cells.get(1).value, "data2"); + Assert.assertEquals((String)project.rows.get(0).cells.get(2).value, "data3"); + Assert.assertEquals((String)project.rows.get(0).cells.get(3).value, "data4"); + + verify(properties, times(1)).getProperty("separator"); + verifyGetOption("ignore",properties); + verifyGetOption("header-lines",properties); + verifyGetOption("limit",properties); + verifyGetOption("skip",properties); + verifyGetOption("ignore-quotes",properties); + } + + //--helpers-- + /** + * Used for parameterized testing for both SeparatorParser and TsvCsvParser. + */ + @DataProvider(name = "CSV-or-null") + public Object[][] CSV_or_null(){ + return new Object[][]{{ + ",", + null + }}; + } + + public void whenGetBooleanOption(String name, Properties properties, Boolean def){ + when(properties.containsKey(name)).thenReturn(true); + when(properties.getProperty(name)).thenReturn(Boolean.toString(def)); + } + + public void whenGetIntegerOption(String name, Properties properties, int def){ + when(properties.containsKey(name)).thenReturn(true); + when(properties.getProperty(name)).thenReturn(Integer.toString(def)); + } + + public void verifyGetOption(String name, Properties properties){ + verify(properties, times(1)).containsKey(name); + verify(properties, times(1)).getProperty(name); + } + +} diff --git a/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesStub.java b/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesStub.java new file mode 100644 index 000000000..5ad0bc824 --- /dev/null +++ b/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesStub.java @@ -0,0 +1,32 @@ +package com.google.refine.tests.importers; + +import java.util.List; + +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamReader; + +import com.google.refine.importers.XmlImportUtilities; +import com.google.refine.model.Project; + +public class XmlImportUtilitiesStub extends XmlImportUtilities { + + public List detectRecordElementWrapper(XMLStreamReader parser, String tag) throws XMLStreamException{ + return super.detectRecordElement(parser, tag); + } + + public void ProcessSubRecordWrapper(Project project, XMLStreamReader parser, ImportColumnGroup columnGroup, ImportRecord record) throws XMLStreamException{ + super.processSubRecord(project, parser, columnGroup, record); + } + + public void findRecordWrapper(Project project, XMLStreamReader parser, String[] recordPath, int pathIndex, ImportColumnGroup rootColumnGroup) throws XMLStreamException{ + super.findRecord(project, parser, recordPath, pathIndex, rootColumnGroup); + } + + public void processRecordWrapper(Project project, XMLStreamReader parser, ImportColumnGroup rootColumnGroup) throws XMLStreamException{ + super.processRecord(project, parser, rootColumnGroup); + } + + public void addCellWrapper(Project project, ImportColumnGroup columnGroup, ImportRecord record, String columnLocalName, String text, int commonStartingRowIndex) { + super.addCell(project, columnGroup, record, columnLocalName, text); + } +} diff --git a/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesTests.java b/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesTests.java new file mode 100644 index 000000000..f8ea90464 --- /dev/null +++ b/main/tests/server/src/com/google/refine/tests/importers/XmlImportUtilitiesTests.java @@ -0,0 +1,388 @@ +package com.google.refine.tests.importers; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.util.ArrayList; +import java.util.List; + +import javax.xml.stream.FactoryConfigurationError; +import javax.xml.stream.XMLInputFactory; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamReader; + +import org.slf4j.LoggerFactory; +import org.testng.Assert; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.Test; + +import com.google.refine.importers.XmlImportUtilities.ImportColumn; +import com.google.refine.importers.XmlImportUtilities.ImportColumnGroup; +import com.google.refine.importers.XmlImportUtilities.ImportRecord; +import com.google.refine.model.Project; +import com.google.refine.model.Row; +import com.google.refine.tests.GridworksTest; + + +public class XmlImportUtilitiesTests extends GridworksTest { + + @BeforeTest + public void init() { + logger = LoggerFactory.getLogger(this.getClass()); + } + + //dependencies + Project project; + XMLStreamReader parser; + ImportColumnGroup columnGroup; + ImportRecord record; + ByteArrayInputStream inputStream; + + //System Under Test + XmlImportUtilitiesStub SUT; + + @BeforeMethod + public void SetUp(){ + SUT = new XmlImportUtilitiesStub(); + project = new Project(); + columnGroup = new ImportColumnGroup(); + record = new ImportRecord(); + } + + @AfterMethod + public void TearDown() throws IOException{ + SUT = null; + project = null; + parser = null; + columnGroup = null; + record = null; + if(inputStream != null) + inputStream.close(); + inputStream = null; + } + + @Test + public void detectPathFromTagTest(){ + loadXml("author1genre1"); + String tag = "library"; + + String[] response = XmlImportUtilitiesStub.detectPathFromTag(inputStream, tag); + Assert.assertNotNull(response); + Assert.assertEquals(response.length, 1); + Assert.assertEquals(response[0], "library"); + } + + @Test + public void detectPathFromTagWithNestedElement(){ + loadXml("author1genre1"); + String tag = "book"; + String[] response = XmlImportUtilitiesStub.detectPathFromTag(inputStream, tag); + Assert.assertNotNull(response); + Assert.assertEquals(response.length, 2); + Assert.assertEquals(response[0], "library"); + Assert.assertEquals(response[1], "book"); + } + + @Test + public void detectRecordElementTest(){ + loadXml("author1genre1"); + createParser(); + String tag="library"; + + List response = new ArrayList(); + try { + response = SUT.detectRecordElementWrapper(parser, tag); + } catch (XMLStreamException e) { + Assert.fail(); + } + Assert.assertNotNull(response); + Assert.assertEquals(response.size(), 1); + Assert.assertEquals(response.get(0), "library"); + } + + @Test + public void detectRecordElementCanHandleWithNestedElements(){ + loadXml("author1genre1"); + createParser(); + String tag="book"; + + List response = new ArrayList(); + try { + response = SUT.detectRecordElementWrapper(parser, tag); + } catch (XMLStreamException e) { + Assert.fail(); + } + Assert.assertNotNull(response); + Assert.assertEquals(response.size(), 2); + Assert.assertEquals(response.get(0), "library"); + Assert.assertEquals(response.get(1), "book"); + } + + @Test + public void detectRecordElementIsNullForUnfoundTag(){ + loadXml("author1genre1"); + createParser(); + String tag=""; + + List response = new ArrayList(); + try { + response = SUT.detectRecordElementWrapper(parser, tag); + } catch (XMLStreamException e) { + Assert.fail(); + } + Assert.assertNull(response); + } + + @Test + public void detectRecordElementRegressionTest(){ + loadSampleXml(); + + String[] path = XmlImportUtilitiesStub.detectRecordElement(inputStream); + Assert.assertNotNull(path); + Assert.assertEquals(path.length, 2); + Assert.assertEquals(path[0], "library"); + Assert.assertEquals(path[1], "book"); + } + + @Test + public void importXmlTest(){ + loadSampleXml(); + + String[] recordPath = new String[]{"library","book"}; + XmlImportUtilitiesStub.importXml(inputStream, project, recordPath, columnGroup ); + + log(project); + assertProjectCreated(project, 0, 6); + + Assert.assertEquals(project.rows.get(0).cells.size(), 4); + + Assert.assertEquals(columnGroup.subgroups.size(), 1); + Assert.assertNotNull(columnGroup.subgroups.get("book")); + Assert.assertEquals(columnGroup.subgroups.get("book").subgroups.size(), 3); + Assert.assertNotNull(columnGroup.subgroups.get("book").subgroups.get("author")); + Assert.assertNotNull(columnGroup.subgroups.get("book").subgroups.get("title")); + Assert.assertNotNull(columnGroup.subgroups.get("book").subgroups.get("publish_date")); + } + + @Test + public void importXmlWithVaryingStructureTest(){ + loadXml(XmlImporterTests.getSampleWithVaryingStructure()); + + String[] recordPath = new String[]{"library", "book"}; + XmlImportUtilitiesStub.importXml(inputStream, project, recordPath, columnGroup); + + log(project); + assertProjectCreated(project, 0, 6); + Assert.assertEquals(project.rows.get(0).cells.size(), 4); + Assert.assertEquals(project.rows.get(5).cells.size(), 5); + + Assert.assertEquals(columnGroup.subgroups.size(), 1); + Assert.assertEquals(columnGroup.name, ""); + ImportColumnGroup book = columnGroup.subgroups.get("book"); + Assert.assertNotNull(book); + Assert.assertEquals(book.columns.size(), 1); + Assert.assertEquals(book.subgroups.size(), 4); + Assert.assertNotNull(book.subgroups.get("author")); + Assert.assertEquals(book.subgroups.get("author").columns.size(), 1); + Assert.assertNotNull(book.subgroups.get("title")); + Assert.assertNotNull(book.subgroups.get("publish_date")); + Assert.assertNotNull(book.subgroups.get("genre")); + } + + @Test + public void createColumnsFromImportTest(){ + + ImportColumnGroup columnGroup = new ImportColumnGroup(); + ImportColumnGroup subGroup = new ImportColumnGroup(); + columnGroup.columns.put("a", new ImportColumn("hello")); + columnGroup.columns.put("b", new ImportColumn("world")); + subGroup.columns.put("c", new ImportColumn("foo")); + subGroup.columns.put("d", new ImportColumn("bar")); + columnGroup.subgroups.put("e", subGroup); + + XmlImportUtilitiesStub.createColumnsFromImport(project, columnGroup); + log(project); + assertProjectCreated(project, 4, 0); + Assert.assertEquals(project.columnModel.columns.get(0).getName(), "world"); + Assert.assertEquals(project.columnModel.columns.get(1).getName(), "hello"); + Assert.assertEquals(project.columnModel.columns.get(2).getName(), "bar"); + Assert.assertEquals(project.columnModel.columns.get(3).getName(), "foo"); + Assert.assertEquals(project.columnModel.columnGroups.get(0).keyColumnIndex, 2); + Assert.assertEquals(project.columnModel.columnGroups.get(0).startColumnIndex, 2); + Assert.assertEquals(project.columnModel.columnGroups.get(0).columnSpan, 2); + } + + @Test + public void findRecordTest(){ + loadSampleXml(); + createParser(); + ParserSkip(); + + String[] recordPath = new String[]{"library","book"}; + int pathIndex = 0; + + try { + SUT.findRecordWrapper(project, parser, recordPath, pathIndex, columnGroup); + } catch (XMLStreamException e) { + Assert.fail(); + } + + log(project); + assertProjectCreated(project, 0, 6); + + Assert.assertEquals(project.rows.get(0).cells.size(), 4); + //TODO + } + + @Test + public void processRecordTest(){ + loadXml("author1genre1"); + createParser(); + ParserSkip(); + + try { + SUT.processRecordWrapper(project, parser, columnGroup); + } catch (XMLStreamException e) { + Assert.fail(); + } + log(project); + Assert.assertNotNull(project.rows); + Assert.assertEquals(project.rows.size(), 1); + Row row = project.rows.get(0); + Assert.assertNotNull(row); + Assert.assertNotNull(row.getCell(1)); + Assert.assertEquals(row.getCell(1).value, "author1"); + + } + + @Test + public void processRecordTestDuplicateColumns(){ + loadXml("author1author2genre1"); + createParser(); + ParserSkip(); + + try { + SUT.processRecordWrapper(project, parser, columnGroup); + } catch (XMLStreamException e) { + Assert.fail(); + } + log(project); + Assert.assertNotNull(project.rows); + Assert.assertEquals(project.rows.size(), 2); + Row row = project.rows.get(0); + Assert.assertNotNull(row); + Assert.assertEquals(row.cells.size(), 3); + Assert.assertNotNull(row.getCell(1)); + Assert.assertEquals(row.getCell(1).value, "author1"); + row = project.rows.get(1); + Assert.assertEquals(row.getCell(1).value, "author2"); + } + + @Test + public void processRecordTestNestedElement(){ + loadXml("author1a dategenre1"); + createParser(); + ParserSkip(); + + try { + SUT.processRecordWrapper(project, parser, columnGroup); + } catch (XMLStreamException e) { + Assert.fail(); + } + log(project); + Assert.assertNotNull(project.rows); + Assert.assertEquals(project.rows.size(), 1); + Row row = project.rows.get(0); + Assert.assertNotNull(row); + Assert.assertEquals(row.cells.size(), 4); + Assert.assertNotNull(row.getCell(1)); + Assert.assertEquals(row.getCell(1).value, "author1"); + Assert.assertNotNull(row.getCell(2)); + Assert.assertEquals(row.getCell(2).value, "a date"); + } + + + @Test + public void processSubRecordTest(){ + loadXml("author1genre1"); + createParser(); + ParserSkip(); + + try { + SUT.ProcessSubRecordWrapper(project, parser, columnGroup, record); + } catch (XMLStreamException e) { + Assert.fail(); + } + log(project); + + Assert.assertEquals(columnGroup.subgroups.size(), 1); + Assert.assertEquals(columnGroup.name, ""); + + Assert.assertNotNull(columnGroup.subgroups.get("library")); + Assert.assertEquals(columnGroup.subgroups.get("library").subgroups.size(), 1); + + ImportColumnGroup book = columnGroup.subgroups.get("library").subgroups.get("book"); + Assert.assertNotNull(book); + Assert.assertEquals(book.subgroups.size(), 2); + Assert.assertNotNull(book.subgroups.get("author")); + Assert.assertNotNull(book.subgroups.get("genre")); + + //TODO check record + } + + @Test + public void addCellTest(){ + String columnLocalName = "author"; + String text = "Author1, The"; + int commonStartingRowIndex = 0; + SUT.addCellWrapper(project, columnGroup, record, columnLocalName, text, commonStartingRowIndex); + + Assert.assertNotNull(record); + Assert.assertNotNull(record.rows); + //Assert.assertNotNull(record.columnEmptyRowIndices); + Assert.assertEquals(record.rows.size(), 1); + //Assert.assertEquals(record.columnEmptyRowIndices.size(), 2); + Assert.assertNotNull(record.rows.get(0)); + //Assert.assertNotNull(record.columnEmptyRowIndices.get(0)); + //Assert.assertNotNull(record.columnEmptyRowIndices.get(1)); + Assert.assertEquals(record.rows.get(0).size(), 2); + Assert.assertNotNull(record.rows.get(0).get(0)); + Assert.assertEquals(record.rows.get(0).get(0).value, "Author1, The"); + //Assert.assertEquals(record.columnEmptyRowIndices.get(0).intValue(),0); + //Assert.assertEquals(record.columnEmptyRowIndices.get(1).intValue(),1); + + } + + //----------------helpers------------- + public void loadSampleXml(){ + loadXml( XmlImporterTests.getSample() ); + } + + public void loadXml(String xml){ + try { + inputStream = new ByteArrayInputStream( xml.getBytes( "UTF-8" ) ); + } catch (UnsupportedEncodingException e1) { + Assert.fail(); + } + } + + public void ParserSkip(){ + try { + parser.next(); //move parser forward once e.g. skip the START_DOCUMENT parser event + } catch (XMLStreamException e1) { + Assert.fail(); + } + } + + public void createParser(){ + try { + parser = XMLInputFactory.newInstance().createXMLStreamReader(inputStream); + } catch (XMLStreamException e1) { + Assert.fail(); + } catch (FactoryConfigurationError e1) { + Assert.fail(); + } + } +} diff --git a/main/tests/server/src/com/google/refine/tests/importers/XmlImporterTests.java b/main/tests/server/src/com/google/refine/tests/importers/XmlImporterTests.java new file mode 100644 index 000000000..e0c1a29e4 --- /dev/null +++ b/main/tests/server/src/com/google/refine/tests/importers/XmlImporterTests.java @@ -0,0 +1,234 @@ +package com.google.refine.tests.importers; + +import static org.mockito.Mockito.mock; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.util.Properties; + +import org.slf4j.LoggerFactory; +import org.testng.Assert; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.Test; + +import com.google.refine.ProjectMetadata; +import com.google.refine.importers.XmlImporter; +import com.google.refine.model.Project; +import com.google.refine.model.Row; +import com.google.refine.tests.GridworksTest; + + +public class XmlImporterTests extends GridworksTest { + + @BeforeTest + public void init() { + logger = LoggerFactory.getLogger(this.getClass()); + } + + //dependencies + Project project = null; + Properties options = null; + ByteArrayInputStream inputStream = null; + + //System Under Test + XmlImporter SUT = null; + + + @BeforeMethod + public void SetUp(){ + SUT = new XmlImporter(); + project = new Project(); + options = mock(Properties.class); + } + + @AfterMethod + public void TearDown() throws IOException{ + SUT = null; + project = null; + options = null; + if (inputStream != null) inputStream.close(); + inputStream = null; + } + + @Test + public void canParseSample(){ + RunTest(getSample()); + + log(project); + assertProjectCreated(project, 4, 6); + + Row row = project.rows.get(0); + Assert.assertNotNull(row); + Assert.assertNotNull(row.getCell(1)); + Assert.assertEquals(row.getCell(1).value, "Author 1, The"); + } + + @Test + public void canParseSampleWithDuplicateNestedElements(){ + RunTest(getSampleWithDuplicateNestedElements()); + + log(project); + assertProjectCreated(project, 4, 12); + + Row row = project.rows.get(0); + Assert.assertNotNull(row); + Assert.assertEquals(row.cells.size(), 4); + Assert.assertNotNull(row.getCell(2)); + Assert.assertEquals(row.getCell(1).value, "Author 1, The"); + Assert.assertEquals(project.rows.get(1).getCell(1).value, "Author 1, Another"); + } + + @Test + public void testCanParseLineBreak(){ + + RunTest(getSampleWithLineBreak()); + + log(project); + assertProjectCreated(project, 4, 6); + + Row row = project.rows.get(3); + Assert.assertNotNull(row); + Assert.assertEquals(row.cells.size(), 4); + Assert.assertNotNull(row.getCell(1)); + Assert.assertEquals(row.getCell(1).value, "With line\n break"); + } + + @Test + public void testElementsWithVaryingStructure(){ + RunTest(getSampleWithVaryingStructure()); + + log(project); + assertProjectCreated(project, 5, 6); + + Assert.assertEquals( project.columnModel.getColumnByCellIndex(5).getName(), "book - genre"); + + Row row0 = project.rows.get(0); + Assert.assertNotNull(row0); + Assert.assertEquals(row0.cells.size(),4); + + Row row5 = project.rows.get(5); + Assert.assertNotNull(row5); + Assert.assertEquals(row5.cells.size(),5); + } + + @Test + public void testElementWithNestedTree(){ + RunTest(getSampleWithTreeStructure()); + log(project); + assertProjectCreated(project, 5, 6); + + Assert.assertEquals(project.columnModel.columnGroups.size(),1); + Assert.assertEquals(project.columnModel.columnGroups.get(0).keyColumnIndex, 2); + Assert.assertEquals(project.columnModel.columnGroups.get(0).startColumnIndex, 2); + Assert.assertNull(project.columnModel.columnGroups.get(0).parentGroup); + Assert.assertEquals(project.columnModel.columnGroups.get(0).subgroups.size(),0); + Assert.assertEquals(project.columnModel.columnGroups.get(0).columnSpan,2); + } + + //------------helper methods--------------- + + public static String getTypicalElement(int id){ + return "" + + "Author " + id + ", The" + + "Book title " + id + "" + + "2010-05-26" + + ""; + } + + public static String getElementWithDuplicateSubElement(int id){ + return "" + + "Author " + id + ", The" + + "Author " + id + ", Another" + + "Book title " + id + "" + + "2010-05-26" + + ""; + } + + public static String getSample(){ + StringBuilder sb = new StringBuilder(); + sb.append(""); + for(int i = 1; i < 7; i++){ + sb.append(getTypicalElement(i)); + } + sb.append(""); + return sb.toString(); + } + + public static String getSampleWithDuplicateNestedElements(){ + StringBuilder sb = new StringBuilder(); + sb.append(""); + for(int i = 1; i < 7; i++){ + sb.append(getElementWithDuplicateSubElement(i)); + } + sb.append(""); + return sb.toString(); + + } + + public static String getSampleWithLineBreak(){ + StringBuilder sb = new StringBuilder(); + sb.append(""); + for(int i = 1; i < 4; i++){ + sb.append(getTypicalElement(i)); + } + sb.append("" + + "With line\n break" + + "Book title 4" + + "2010-05-26" + + ""); + sb.append(getTypicalElement(5)); + sb.append(getTypicalElement(6)); + sb.append(""); + return sb.toString(); + } + + public static String getSampleWithVaryingStructure(){ + StringBuilder sb = new StringBuilder(); + sb.append(""); + for(int i = 1; i < 6; i++){ + sb.append(getTypicalElement(i)); + } + sb.append("" + + "Author 6, The" + + "Book title 6" + + "New element not seen in other records" + + "2010-05-26" + + ""); + sb.append(""); + return sb.toString(); + } + + public static String getSampleWithTreeStructure(){ + StringBuilder sb = new StringBuilder(); + sb.append(""); + for(int i = 1; i < 7; i++){ + sb.append("" + + "Author " + i + ", The" + + "1950-0" + i + "-15" + + "Book title " + i + "" + + "2010-05-26" + + ""); + } + sb.append(""); + return sb.toString(); + } + + private void RunTest(String testString){ + try { + inputStream = new ByteArrayInputStream( testString.getBytes( "UTF-8" ) ); + } catch (UnsupportedEncodingException e1) { + Assert.fail(); + } + + try { + SUT.read(inputStream, project, new ProjectMetadata(), options); + } catch (Exception e) { + Assert.fail(); + } + } + + +} diff --git a/main/tests/server/src/com/google/refine/tests/model/ProjectStub.java b/main/tests/server/src/com/google/refine/tests/model/ProjectStub.java new file mode 100644 index 000000000..fc7daf129 --- /dev/null +++ b/main/tests/server/src/com/google/refine/tests/model/ProjectStub.java @@ -0,0 +1,10 @@ +package com.google.refine.tests.model; + +import com.google.refine.model.Project; + + +public class ProjectStub extends Project { + public ProjectStub(long id){ + super(id); + } +} diff --git a/main/tests/server/src/com/google/refine/tests/util/ParsingUtilitiesTests.java b/main/tests/server/src/com/google/refine/tests/util/ParsingUtilitiesTests.java new file mode 100644 index 000000000..a20b1cfaf --- /dev/null +++ b/main/tests/server/src/com/google/refine/tests/util/ParsingUtilitiesTests.java @@ -0,0 +1,54 @@ +package com.google.refine.tests.util; + +import org.json.JSONException; +import org.json.JSONObject; +import org.slf4j.LoggerFactory; +import org.testng.Assert; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.Test; + +import com.google.refine.tests.GridworksTest; +import com.google.refine.util.ParsingUtilities; + +public class ParsingUtilitiesTests extends GridworksTest { + + @BeforeTest + public void init() { + logger = LoggerFactory.getLogger(this.getClass()); + } + + //--------------evaluateJsonStringToObject tests----------------------- + + @Test + public void evaluateJsonStringToObjectRegressionTest(){ + try { + JSONObject o = ParsingUtilities.evaluateJsonStringToObject("{\"foo\":\"bar\"}"); + Assert.assertNotNull(o); + Assert.assertEquals("bar", o.getString("foo")); + } catch (JSONException e) { + Assert.fail(); + } + } + + @Test + public void evaluateJsonStringToObjectWithNullParameters(){ + try { + Assert.assertNull(ParsingUtilities.evaluateJsonStringToObject(null)); + Assert.fail(); + } catch (IllegalArgumentException e){ + //expected + } catch (JSONException e) { + Assert.fail(); + } + } + + @Test + public void evaluateJsonStringToObjectWithMalformedParameters(){ + try { + ParsingUtilities.evaluateJsonStringToObject("malformed"); + Assert.fail(); + } catch (JSONException e) { + //expected + } + } +} diff --git a/main/webapp/WEB-INF/web.xml b/main/webapp/WEB-INF/web.xml index 038bde040..2d81222b2 100644 --- a/main/webapp/WEB-INF/web.xml +++ b/main/webapp/WEB-INF/web.xml @@ -20,7 +20,7 @@ gridworks - com.google.gridworks.GridworksServlet + com.google.refine.GridworksServlet diff --git a/main/webapp/modules/core/MOD-INF/controller.js b/main/webapp/modules/core/MOD-INF/controller.js index 97ff6909b..518151a24 100644 --- a/main/webapp/modules/core/MOD-INF/controller.js +++ b/main/webapp/modules/core/MOD-INF/controller.js @@ -1,6 +1,6 @@ var html = "text/html"; var encoding = "UTF-8"; -var ClientSideResourceManager = Packages.com.google.gridworks.ClientSideResourceManager; +var ClientSideResourceManager = Packages.com.google.refine.ClientSideResourceManager; var bundle = true; var templatedFiles = { @@ -12,132 +12,132 @@ var templatedFiles = { }; function registerCommands() { - var GS = Packages.com.google.gridworks.GridworksServlet; + var GS = Packages.com.google.refine.GridworksServlet; - GS.registerCommand(module, "create-project-from-upload", new Packages.com.google.gridworks.commands.project.CreateProjectCommand()); - GS.registerCommand(module, "import-project", new Packages.com.google.gridworks.commands.project.ImportProjectCommand()); - GS.registerCommand(module, "export-project", new Packages.com.google.gridworks.commands.project.ExportProjectCommand()); - GS.registerCommand(module, "export-rows", new Packages.com.google.gridworks.commands.project.ExportRowsCommand()); + GS.registerCommand(module, "create-project-from-upload", new Packages.com.google.refine.commands.project.CreateProjectCommand()); + GS.registerCommand(module, "import-project", new Packages.com.google.refine.commands.project.ImportProjectCommand()); + GS.registerCommand(module, "export-project", new Packages.com.google.refine.commands.project.ExportProjectCommand()); + GS.registerCommand(module, "export-rows", new Packages.com.google.refine.commands.project.ExportRowsCommand()); - GS.registerCommand(module, "get-project-metadata", new Packages.com.google.gridworks.commands.project.GetProjectMetadataCommand()); - GS.registerCommand(module, "get-all-project-metadata", new Packages.com.google.gridworks.commands.workspace.GetAllProjectMetadataCommand()); + GS.registerCommand(module, "get-project-metadata", new Packages.com.google.refine.commands.project.GetProjectMetadataCommand()); + GS.registerCommand(module, "get-all-project-metadata", new Packages.com.google.refine.commands.workspace.GetAllProjectMetadataCommand()); - GS.registerCommand(module, "delete-project", new Packages.com.google.gridworks.commands.project.DeleteProjectCommand()); - GS.registerCommand(module, "rename-project", new Packages.com.google.gridworks.commands.project.RenameProjectCommand()); + GS.registerCommand(module, "delete-project", new Packages.com.google.refine.commands.project.DeleteProjectCommand()); + GS.registerCommand(module, "rename-project", new Packages.com.google.refine.commands.project.RenameProjectCommand()); - GS.registerCommand(module, "get-models", new Packages.com.google.gridworks.commands.project.GetModelsCommand()); - GS.registerCommand(module, "get-rows", new Packages.com.google.gridworks.commands.row.GetRowsCommand()); - GS.registerCommand(module, "get-processes", new Packages.com.google.gridworks.commands.history.GetProcessesCommand()); - GS.registerCommand(module, "get-history", new Packages.com.google.gridworks.commands.history.GetHistoryCommand()); - GS.registerCommand(module, "get-operations", new Packages.com.google.gridworks.commands.history.GetOperationsCommand()); - GS.registerCommand(module, "get-columns-info", new Packages.com.google.gridworks.commands.column.GetColumnsInfoCommand()); - GS.registerCommand(module, "get-scatterplot", new Packages.com.google.gridworks.commands.browsing.GetScatterplotCommand()); + GS.registerCommand(module, "get-models", new Packages.com.google.refine.commands.project.GetModelsCommand()); + GS.registerCommand(module, "get-rows", new Packages.com.google.refine.commands.row.GetRowsCommand()); + GS.registerCommand(module, "get-processes", new Packages.com.google.refine.commands.history.GetProcessesCommand()); + GS.registerCommand(module, "get-history", new Packages.com.google.refine.commands.history.GetHistoryCommand()); + GS.registerCommand(module, "get-operations", new Packages.com.google.refine.commands.history.GetOperationsCommand()); + GS.registerCommand(module, "get-columns-info", new Packages.com.google.refine.commands.column.GetColumnsInfoCommand()); + GS.registerCommand(module, "get-scatterplot", new Packages.com.google.refine.commands.browsing.GetScatterplotCommand()); - GS.registerCommand(module, "undo-redo", new Packages.com.google.gridworks.commands.history.UndoRedoCommand()); - GS.registerCommand(module, "apply-operations", new Packages.com.google.gridworks.commands.history.ApplyOperationsCommand()); - GS.registerCommand(module, "cancel-processes", new Packages.com.google.gridworks.commands.history.CancelProcessesCommand()); + GS.registerCommand(module, "undo-redo", new Packages.com.google.refine.commands.history.UndoRedoCommand()); + GS.registerCommand(module, "apply-operations", new Packages.com.google.refine.commands.history.ApplyOperationsCommand()); + GS.registerCommand(module, "cancel-processes", new Packages.com.google.refine.commands.history.CancelProcessesCommand()); - GS.registerCommand(module, "compute-facets", new Packages.com.google.gridworks.commands.browsing.ComputeFacetsCommand()); - GS.registerCommand(module, "compute-clusters", new Packages.com.google.gridworks.commands.browsing.ComputeClustersCommand()); + GS.registerCommand(module, "compute-facets", new Packages.com.google.refine.commands.browsing.ComputeFacetsCommand()); + GS.registerCommand(module, "compute-clusters", new Packages.com.google.refine.commands.browsing.ComputeClustersCommand()); - GS.registerCommand(module, "edit-one-cell", new Packages.com.google.gridworks.commands.cell.EditOneCellCommand()); - GS.registerCommand(module, "text-transform", new Packages.com.google.gridworks.commands.cell.TextTransformCommand()); - GS.registerCommand(module, "mass-edit", new Packages.com.google.gridworks.commands.cell.MassEditCommand()); - GS.registerCommand(module, "join-multi-value-cells", new Packages.com.google.gridworks.commands.cell.JoinMultiValueCellsCommand()); - GS.registerCommand(module, "split-multi-value-cells", new Packages.com.google.gridworks.commands.cell.SplitMultiValueCellsCommand()); - GS.registerCommand(module, "fill-down", new Packages.com.google.gridworks.commands.cell.FillDownCommand()); - GS.registerCommand(module, "blank-down", new Packages.com.google.gridworks.commands.cell.BlankDownCommand()); - GS.registerCommand(module, "transpose-columns-into-rows", new Packages.com.google.gridworks.commands.cell.TransposeColumnsIntoRowsCommand()); - GS.registerCommand(module, "transpose-rows-into-columns", new Packages.com.google.gridworks.commands.cell.TransposeRowsIntoColumnsCommand()); + GS.registerCommand(module, "edit-one-cell", new Packages.com.google.refine.commands.cell.EditOneCellCommand()); + GS.registerCommand(module, "text-transform", new Packages.com.google.refine.commands.cell.TextTransformCommand()); + GS.registerCommand(module, "mass-edit", new Packages.com.google.refine.commands.cell.MassEditCommand()); + GS.registerCommand(module, "join-multi-value-cells", new Packages.com.google.refine.commands.cell.JoinMultiValueCellsCommand()); + GS.registerCommand(module, "split-multi-value-cells", new Packages.com.google.refine.commands.cell.SplitMultiValueCellsCommand()); + GS.registerCommand(module, "fill-down", new Packages.com.google.refine.commands.cell.FillDownCommand()); + GS.registerCommand(module, "blank-down", new Packages.com.google.refine.commands.cell.BlankDownCommand()); + GS.registerCommand(module, "transpose-columns-into-rows", new Packages.com.google.refine.commands.cell.TransposeColumnsIntoRowsCommand()); + GS.registerCommand(module, "transpose-rows-into-columns", new Packages.com.google.refine.commands.cell.TransposeRowsIntoColumnsCommand()); - GS.registerCommand(module, "add-column", new Packages.com.google.gridworks.commands.column.AddColumnCommand()); - GS.registerCommand(module, "remove-column", new Packages.com.google.gridworks.commands.column.RemoveColumnCommand()); - GS.registerCommand(module, "rename-column", new Packages.com.google.gridworks.commands.column.RenameColumnCommand()); - GS.registerCommand(module, "move-column", new Packages.com.google.gridworks.commands.column.MoveColumnCommand()); - GS.registerCommand(module, "split-column", new Packages.com.google.gridworks.commands.column.SplitColumnCommand()); - GS.registerCommand(module, "extend-data", new Packages.com.google.gridworks.commands.column.ExtendDataCommand()); - GS.registerCommand(module, "add-column-by-fetching-urls", new Packages.com.google.gridworks.commands.column.AddColumnByFetchingURLsCommand()); - GS.registerCommand(module, "reorder-columns", new Packages.com.google.gridworks.commands.column.ReorderColumnsCommand()); + GS.registerCommand(module, "add-column", new Packages.com.google.refine.commands.column.AddColumnCommand()); + GS.registerCommand(module, "remove-column", new Packages.com.google.refine.commands.column.RemoveColumnCommand()); + GS.registerCommand(module, "rename-column", new Packages.com.google.refine.commands.column.RenameColumnCommand()); + GS.registerCommand(module, "move-column", new Packages.com.google.refine.commands.column.MoveColumnCommand()); + GS.registerCommand(module, "split-column", new Packages.com.google.refine.commands.column.SplitColumnCommand()); + GS.registerCommand(module, "extend-data", new Packages.com.google.refine.commands.column.ExtendDataCommand()); + GS.registerCommand(module, "add-column-by-fetching-urls", new Packages.com.google.refine.commands.column.AddColumnByFetchingURLsCommand()); + GS.registerCommand(module, "reorder-columns", new Packages.com.google.refine.commands.column.ReorderColumnsCommand()); - GS.registerCommand(module, "denormalize", new Packages.com.google.gridworks.commands.row.DenormalizeCommand()); + GS.registerCommand(module, "denormalize", new Packages.com.google.refine.commands.row.DenormalizeCommand()); - GS.registerCommand(module, "reconcile", new Packages.com.google.gridworks.commands.recon.ReconcileCommand()); - GS.registerCommand(module, "recon-match-best-candidates", new Packages.com.google.gridworks.commands.recon.ReconMatchBestCandidatesCommand()); - GS.registerCommand(module, "recon-mark-new-topics", new Packages.com.google.gridworks.commands.recon.ReconMarkNewTopicsCommand()); - GS.registerCommand(module, "recon-discard-judgments", new Packages.com.google.gridworks.commands.recon.ReconDiscardJudgmentsCommand()); - GS.registerCommand(module, "recon-match-specific-topic-to-cells", new Packages.com.google.gridworks.commands.recon.ReconMatchSpecificTopicCommand()); - GS.registerCommand(module, "recon-judge-one-cell", new Packages.com.google.gridworks.commands.recon.ReconJudgeOneCellCommand()); - GS.registerCommand(module, "recon-judge-similar-cells", new Packages.com.google.gridworks.commands.recon.ReconJudgeSimilarCellsCommand()); + GS.registerCommand(module, "reconcile", new Packages.com.google.refine.commands.recon.ReconcileCommand()); + GS.registerCommand(module, "recon-match-best-candidates", new Packages.com.google.refine.commands.recon.ReconMatchBestCandidatesCommand()); + GS.registerCommand(module, "recon-mark-new-topics", new Packages.com.google.refine.commands.recon.ReconMarkNewTopicsCommand()); + GS.registerCommand(module, "recon-discard-judgments", new Packages.com.google.refine.commands.recon.ReconDiscardJudgmentsCommand()); + GS.registerCommand(module, "recon-match-specific-topic-to-cells", new Packages.com.google.refine.commands.recon.ReconMatchSpecificTopicCommand()); + GS.registerCommand(module, "recon-judge-one-cell", new Packages.com.google.refine.commands.recon.ReconJudgeOneCellCommand()); + GS.registerCommand(module, "recon-judge-similar-cells", new Packages.com.google.refine.commands.recon.ReconJudgeSimilarCellsCommand()); - GS.registerCommand(module, "annotate-one-row", new Packages.com.google.gridworks.commands.row.AnnotateOneRowCommand()); - GS.registerCommand(module, "annotate-rows", new Packages.com.google.gridworks.commands.row.AnnotateRowsCommand()); - GS.registerCommand(module, "remove-rows", new Packages.com.google.gridworks.commands.row.RemoveRowsCommand()); - GS.registerCommand(module, "reorder-rows", new Packages.com.google.gridworks.commands.row.ReorderRowsCommand()); + GS.registerCommand(module, "annotate-one-row", new Packages.com.google.refine.commands.row.AnnotateOneRowCommand()); + GS.registerCommand(module, "annotate-rows", new Packages.com.google.refine.commands.row.AnnotateRowsCommand()); + GS.registerCommand(module, "remove-rows", new Packages.com.google.refine.commands.row.RemoveRowsCommand()); + GS.registerCommand(module, "reorder-rows", new Packages.com.google.refine.commands.row.ReorderRowsCommand()); - GS.registerCommand(module, "save-protograph", new Packages.com.google.gridworks.commands.freebase.SaveProtographCommand()); + GS.registerCommand(module, "save-protograph", new Packages.com.google.refine.commands.freebase.SaveProtographCommand()); - GS.registerCommand(module, "get-expression-language-info", new Packages.com.google.gridworks.commands.expr.GetExpressionLanguageInfoCommand()); - GS.registerCommand(module, "get-expression-history", new Packages.com.google.gridworks.commands.expr.GetExpressionHistoryCommand()); - GS.registerCommand(module, "log-expression", new Packages.com.google.gridworks.commands.expr.LogExpressionCommand()); + GS.registerCommand(module, "get-expression-language-info", new Packages.com.google.refine.commands.expr.GetExpressionLanguageInfoCommand()); + GS.registerCommand(module, "get-expression-history", new Packages.com.google.refine.commands.expr.GetExpressionHistoryCommand()); + GS.registerCommand(module, "log-expression", new Packages.com.google.refine.commands.expr.LogExpressionCommand()); - GS.registerCommand(module, "preview-expression", new Packages.com.google.gridworks.commands.expr.PreviewExpressionCommand()); - GS.registerCommand(module, "preview-extend-data", new Packages.com.google.gridworks.commands.column.PreviewExtendDataCommand()); - GS.registerCommand(module, "preview-protograph", new Packages.com.google.gridworks.commands.freebase.PreviewProtographCommand()); + GS.registerCommand(module, "preview-expression", new Packages.com.google.refine.commands.expr.PreviewExpressionCommand()); + GS.registerCommand(module, "preview-extend-data", new Packages.com.google.refine.commands.column.PreviewExtendDataCommand()); + GS.registerCommand(module, "preview-protograph", new Packages.com.google.refine.commands.freebase.PreviewProtographCommand()); - GS.registerCommand(module, "guess-types-of-column", new Packages.com.google.gridworks.commands.freebase.GuessTypesOfColumnCommand()); + GS.registerCommand(module, "guess-types-of-column", new Packages.com.google.refine.commands.freebase.GuessTypesOfColumnCommand()); - GS.registerCommand(module, "check-authorization", new Packages.com.google.gridworks.commands.auth.CheckAuthorizationCommand()); - GS.registerCommand(module, "authorize", new Packages.com.google.gridworks.commands.auth.AuthorizeCommand()); - GS.registerCommand(module, "deauthorize", new Packages.com.google.gridworks.commands.auth.DeAuthorizeCommand()); - GS.registerCommand(module, "user-badges", new Packages.com.google.gridworks.commands.auth.GetUserBadgesCommand()); + GS.registerCommand(module, "check-authorization", new Packages.com.google.refine.commands.auth.CheckAuthorizationCommand()); + GS.registerCommand(module, "authorize", new Packages.com.google.refine.commands.auth.AuthorizeCommand()); + GS.registerCommand(module, "deauthorize", new Packages.com.google.refine.commands.auth.DeAuthorizeCommand()); + GS.registerCommand(module, "user-badges", new Packages.com.google.refine.commands.auth.GetUserBadgesCommand()); - GS.registerCommand(module, "upload-data", new Packages.com.google.gridworks.commands.freebase.UploadDataCommand()); - GS.registerCommand(module, "import-qa-data", new Packages.com.google.gridworks.commands.freebase.ImportQADataCommand()); - GS.registerCommand(module, "mqlread", new Packages.com.google.gridworks.commands.freebase.MQLReadCommand()); - GS.registerCommand(module, "mqlwrite", new Packages.com.google.gridworks.commands.freebase.MQLWriteCommand()); + GS.registerCommand(module, "upload-data", new Packages.com.google.refine.commands.freebase.UploadDataCommand()); + GS.registerCommand(module, "import-qa-data", new Packages.com.google.refine.commands.freebase.ImportQADataCommand()); + GS.registerCommand(module, "mqlread", new Packages.com.google.refine.commands.freebase.MQLReadCommand()); + GS.registerCommand(module, "mqlwrite", new Packages.com.google.refine.commands.freebase.MQLWriteCommand()); - GS.registerCommand(module, "get-preference", new Packages.com.google.gridworks.commands.GetPreferenceCommand()); - GS.registerCommand(module, "get-all-preferences", new Packages.com.google.gridworks.commands.GetAllPreferencesCommand()); - GS.registerCommand(module, "set-preference", new Packages.com.google.gridworks.commands.SetPreferenceCommand()); - GS.registerCommand(module, "open-workspace-dir", new Packages.com.google.gridworks.commands.OpenWorkspaceDirCommand()); + GS.registerCommand(module, "get-preference", new Packages.com.google.refine.commands.GetPreferenceCommand()); + GS.registerCommand(module, "get-all-preferences", new Packages.com.google.refine.commands.GetAllPreferencesCommand()); + GS.registerCommand(module, "set-preference", new Packages.com.google.refine.commands.SetPreferenceCommand()); + GS.registerCommand(module, "open-workspace-dir", new Packages.com.google.refine.commands.OpenWorkspaceDirCommand()); } function registerOperations() { - var OR = Packages.com.google.gridworks.operations.OperationRegistry; + var OR = Packages.com.google.refine.operations.OperationRegistry; - OR.registerOperation(module, "text-transform", Packages.com.google.gridworks.operations.cell.TextTransformOperation); - OR.registerOperation(module, "mass-edit", Packages.com.google.gridworks.operations.cell.MassEditOperation); + OR.registerOperation(module, "text-transform", Packages.com.google.refine.operations.cell.TextTransformOperation); + OR.registerOperation(module, "mass-edit", Packages.com.google.refine.operations.cell.MassEditOperation); - OR.registerOperation(module, "multivalued-cell-join", Packages.com.google.gridworks.operations.cell.MultiValuedCellJoinOperation); - OR.registerOperation(module, "multivalued-cell-split", Packages.com.google.gridworks.operations.cell.MultiValuedCellSplitOperation); - OR.registerOperation(module, "fill-down", Packages.com.google.gridworks.operations.cell.FillDownOperation); - OR.registerOperation(module, "blank-down", Packages.com.google.gridworks.operations.cell.BlankDownOperation); - OR.registerOperation(module, "transpose-columns-into-rows", Packages.com.google.gridworks.operations.cell.TransposeColumnsIntoRowsOperation); - OR.registerOperation(module, "transpose-rows-into-columns", Packages.com.google.gridworks.operations.cell.TransposeRowsIntoColumnsOperation); + OR.registerOperation(module, "multivalued-cell-join", Packages.com.google.refine.operations.cell.MultiValuedCellJoinOperation); + OR.registerOperation(module, "multivalued-cell-split", Packages.com.google.refine.operations.cell.MultiValuedCellSplitOperation); + OR.registerOperation(module, "fill-down", Packages.com.google.refine.operations.cell.FillDownOperation); + OR.registerOperation(module, "blank-down", Packages.com.google.refine.operations.cell.BlankDownOperation); + OR.registerOperation(module, "transpose-columns-into-rows", Packages.com.google.refine.operations.cell.TransposeColumnsIntoRowsOperation); + OR.registerOperation(module, "transpose-rows-into-columns", Packages.com.google.refine.operations.cell.TransposeRowsIntoColumnsOperation); - OR.registerOperation(module, "column-addition", Packages.com.google.gridworks.operations.column.ColumnAdditionOperation); - OR.registerOperation(module, "column-removal", Packages.com.google.gridworks.operations.column.ColumnRemovalOperation); - OR.registerOperation(module, "column-rename", Packages.com.google.gridworks.operations.column.ColumnRenameOperation); - OR.registerOperation(module, "column-move", Packages.com.google.gridworks.operations.column.ColumnMoveOperation); - OR.registerOperation(module, "column-split", Packages.com.google.gridworks.operations.column.ColumnSplitOperation); - OR.registerOperation(module, "extend-data", Packages.com.google.gridworks.operations.column.ExtendDataOperation); - OR.registerOperation(module, "column-addition-by-fetching-urls", Packages.com.google.gridworks.operations.column.ColumnAdditionByFetchingURLsOperation); - OR.registerOperation(module, "column-reorder", Packages.com.google.gridworks.operations.column.ColumnReorderOperation); + OR.registerOperation(module, "column-addition", Packages.com.google.refine.operations.column.ColumnAdditionOperation); + OR.registerOperation(module, "column-removal", Packages.com.google.refine.operations.column.ColumnRemovalOperation); + OR.registerOperation(module, "column-rename", Packages.com.google.refine.operations.column.ColumnRenameOperation); + OR.registerOperation(module, "column-move", Packages.com.google.refine.operations.column.ColumnMoveOperation); + OR.registerOperation(module, "column-split", Packages.com.google.refine.operations.column.ColumnSplitOperation); + OR.registerOperation(module, "extend-data", Packages.com.google.refine.operations.column.ExtendDataOperation); + OR.registerOperation(module, "column-addition-by-fetching-urls", Packages.com.google.refine.operations.column.ColumnAdditionByFetchingURLsOperation); + OR.registerOperation(module, "column-reorder", Packages.com.google.refine.operations.column.ColumnReorderOperation); - OR.registerOperation(module, "row-removal", Packages.com.google.gridworks.operations.row.RowRemovalOperation); - OR.registerOperation(module, "row-star", Packages.com.google.gridworks.operations.row.RowStarOperation); - OR.registerOperation(module, "row-flag", Packages.com.google.gridworks.operations.row.RowFlagOperation); - OR.registerOperation(module, "row-reorder", Packages.com.google.gridworks.operations.row.RowReorderOperation); + OR.registerOperation(module, "row-removal", Packages.com.google.refine.operations.row.RowRemovalOperation); + OR.registerOperation(module, "row-star", Packages.com.google.refine.operations.row.RowStarOperation); + OR.registerOperation(module, "row-flag", Packages.com.google.refine.operations.row.RowFlagOperation); + OR.registerOperation(module, "row-reorder", Packages.com.google.refine.operations.row.RowReorderOperation); - OR.registerOperation(module, "recon", Packages.com.google.gridworks.operations.recon.ReconOperation); - OR.registerOperation(module, "recon-mark-new-topics", Packages.com.google.gridworks.operations.recon.ReconMarkNewTopicsOperation); - OR.registerOperation(module, "recon-match-best-candidates", Packages.com.google.gridworks.operations.recon.ReconMatchBestCandidatesOperation); - OR.registerOperation(module, "recon-discard-judgments", Packages.com.google.gridworks.operations.recon.ReconDiscardJudgmentsOperation); - OR.registerOperation(module, "recon-match-specific-topic-to-cells", Packages.com.google.gridworks.operations.recon.ReconMatchSpecificTopicOperation); - OR.registerOperation(module, "recon-judge-similar-cells", Packages.com.google.gridworks.operations.recon.ReconJudgeSimilarCellsOperation); - OR.registerOperation(module, "import-qa-data", Packages.com.google.gridworks.operations.recon.ImportQADataOperation); + OR.registerOperation(module, "recon", Packages.com.google.refine.operations.recon.ReconOperation); + OR.registerOperation(module, "recon-mark-new-topics", Packages.com.google.refine.operations.recon.ReconMarkNewTopicsOperation); + OR.registerOperation(module, "recon-match-best-candidates", Packages.com.google.refine.operations.recon.ReconMatchBestCandidatesOperation); + OR.registerOperation(module, "recon-discard-judgments", Packages.com.google.refine.operations.recon.ReconDiscardJudgmentsOperation); + OR.registerOperation(module, "recon-match-specific-topic-to-cells", Packages.com.google.refine.operations.recon.ReconMatchSpecificTopicOperation); + OR.registerOperation(module, "recon-judge-similar-cells", Packages.com.google.refine.operations.recon.ReconJudgeSimilarCellsOperation); + OR.registerOperation(module, "import-qa-data", Packages.com.google.refine.operations.recon.ImportQADataOperation); - OR.registerOperation(module, "save-protograph", Packages.com.google.gridworks.operations.SaveProtographOperation); + OR.registerOperation(module, "save-protograph", Packages.com.google.refine.operations.SaveProtographOperation); } /* diff --git a/server/IDEs/eclipse/Gridworks.launch b/server/IDEs/eclipse/Gridworks.launch index 3e4a4fd56..f6a20385c 100644 --- a/server/IDEs/eclipse/Gridworks.launch +++ b/server/IDEs/eclipse/Gridworks.launch @@ -1,12 +1,12 @@ - + - + diff --git a/server/src/com/google/gridworks/Configurations.java b/server/src/com/google/gridworks/Configurations.java deleted file mode 100644 index 141472446..000000000 --- a/server/src/com/google/gridworks/Configurations.java +++ /dev/null @@ -1,40 +0,0 @@ -package com.google.gridworks; - -/** - * Centralized configuration facility. - */ -public class Configurations { - - public static String get(final String name) { - return System.getProperty(name); - } - - public static String get(final String name, final String def) { - final String val = get(name); - return (val == null) ? def : val; - } - - public static boolean getBoolean(final String name, final boolean def) { - final String val = get(name); - return (val == null) ? def : Boolean.parseBoolean(val); - } - - public static int getInteger(final String name, final int def) { - final String val = get(name); - try { - return (val == null) ? def : Integer.parseInt(val); - } catch (NumberFormatException e) { - throw new RuntimeException("Could not parse '" + val + "' as an integer number.", e); - } - } - - public static float getFloat(final String name, final float def) { - final String val = get(name); - try { - return (val == null) ? def : Float.parseFloat(val); - } catch (NumberFormatException e) { - throw new RuntimeException("Could not parse '" + val + "' as a floating point number.", e); - } - } - -} diff --git a/server/src/com/google/gridworks/Gridworks.java b/server/src/com/google/gridworks/Gridworks.java deleted file mode 100644 index 6f579e55d..000000000 --- a/server/src/com/google/gridworks/Gridworks.java +++ /dev/null @@ -1,445 +0,0 @@ -package com.google.gridworks; - -import java.awt.Desktop; -import java.awt.event.ActionEvent; -import java.awt.event.ActionListener; -import java.io.File; -import java.io.FileFilter; -import java.io.IOException; -import java.lang.reflect.Method; -import java.net.URI; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.ThreadPoolExecutor; -import java.util.concurrent.TimeUnit; - -import javax.swing.JFrame; -import javax.swing.JMenu; -import javax.swing.JMenuBar; -import javax.swing.JMenuItem; - -import org.apache.log4j.Level; -import org.mortbay.jetty.Connector; -import org.mortbay.jetty.Server; -import org.mortbay.jetty.bio.SocketConnector; -import org.mortbay.jetty.servlet.ServletHolder; -import org.mortbay.jetty.webapp.WebAppContext; -import org.mortbay.util.Scanner; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.codeberry.jdatapath.DataPath; -import com.codeberry.jdatapath.JDataPathSystem; -import com.google.util.threads.ThreadPoolExecutorAdapter; - -/** - * Main class for Gridworks server application. Starts an instance of the - * Jetty HTTP server / servlet container (inner class Gridworks Server). - */ -public class Gridworks { - - static private final String DEFAULT_HOST = "127.0.0.1"; - static private final int DEFAULT_PORT = 3333; - - static private int port; - static private String host; - - final static Logger logger = LoggerFactory.getLogger("gridworks"); - - public static void main(String[] args) throws Exception { - - // tell jetty to use SLF4J for logging instead of its own stuff - System.setProperty("VERBOSE","false"); - System.setProperty("org.mortbay.log.class","org.mortbay.log.Slf4jLog"); - - // tell macosx to keep the menu associated with the screen - System.setProperty("apple.laf.useScreenMenuBar", "true"); - System.setProperty("com.apple.eawt.CocoaComponent.CompatibilityMode", "false"); - - // tell the signpost library to log - //System.setProperty("debug","true"); - - // if not already set, make sure jython can find its python stuff - if (System.getProperty("python.path") == null) { - System.setProperty("python.path","lib/python"); - } - - // set the log verbosity level - org.apache.log4j.Logger.getRootLogger().setLevel(Level.toLevel(Configurations.get("gridworks.verbosity","info"))); - - port = Configurations.getInteger("gridworks.port",DEFAULT_PORT); - host = Configurations.get("gridworks.host",DEFAULT_HOST); - - Gridworks gridworks = new Gridworks(); - - gridworks.init(args); - } - - public void init(String[] args) throws Exception { - - GridworksServer server = new GridworksServer(); - server.init(host,port); - - boolean headless = Configurations.getBoolean("gridworks.headless",false); - if (!headless) { - try { - GridworksClient client = new GridworksClient(); - client.init(host,port); - } catch (Exception e) { - logger.warn("Sorry, some error prevented us from launching the browser for you.\n\n Point your browser to http://" + host + ":" + port + "/ to start using Gridworks."); - } - } - - // hook up the signal handlers - Runtime.getRuntime().addShutdownHook( - new Thread(new ShutdownSignalHandler(server)) - ); - - server.join(); - } -} - -/* -------------- Gridworks Server ----------------- */ - -class GridworksServer extends Server { - - final static Logger logger = LoggerFactory.getLogger("gridworks_server"); - - private ThreadPoolExecutor threadPool; - - public void init(String host, int port) throws Exception { - logger.info("Starting Server bound to '" + host + ":" + port + "'"); - - String memory = Configurations.get("gridworks.memory"); - if (memory != null) logger.info("Max memory size: " + memory); - - int maxThreads = Configurations.getInteger("gridworks.queue.size", 30); - int maxQueue = Configurations.getInteger("gridworks.queue.max_size", 300); - long keepAliveTime = Configurations.getInteger("gridworks.queue.idle_time", 60); - - LinkedBlockingQueue queue = new LinkedBlockingQueue(maxQueue); - - threadPool = new ThreadPoolExecutor(maxThreads, maxQueue, keepAliveTime, TimeUnit.SECONDS, queue); - - this.setThreadPool(new ThreadPoolExecutorAdapter(threadPool)); - - Connector connector = new SocketConnector(); - connector.setPort(port); - connector.setHost(host); - connector.setMaxIdleTime(Configurations.getInteger("gridworks.connection.max_idle_time",60000)); - connector.setStatsOn(false); - this.addConnector(connector); - - File webapp = new File(Configurations.get("gridworks.webapp","main/webapp")); - - if (!isWebapp(webapp)) { - webapp = new File("main/webapp"); - if (!isWebapp(webapp)) { - webapp = new File("webapp"); - if (!isWebapp(webapp)) { - logger.warn("Warning: Failed to find web application at '" + webapp.getAbsolutePath() + "'"); - System.exit(-1); - } - } - } - - final String contextPath = Configurations.get("gridworks.context_path","/"); - - logger.info("Initializing context: '" + contextPath + "' from '" + webapp.getAbsolutePath() + "'"); - WebAppContext context = new WebAppContext(webapp.getAbsolutePath(), contextPath); - context.setMaxFormContentSize(1048576); - - this.setHandler(context); - this.setStopAtShutdown(true); - this.setSendServerVersion(true); - - // Enable context autoreloading - if (Configurations.getBoolean("gridworks.autoreload",false)) { - scanForUpdates(webapp, context); - } - - // start the server - this.start(); - - configure(context); - } - - @Override - protected void doStop() throws Exception { - try { - // shutdown our scheduled tasks first, if any - if (threadPool != null) threadPool.shutdown(); - - // then let the parent stop - super.doStop(); - } catch (InterruptedException e) { - // ignore - } - } - - static private boolean isWebapp(File dir) { - if (dir == null) return false; - if (!dir.exists() || !dir.canRead()) return false; - File webXml = new File(dir, "WEB-INF/web.xml"); - return webXml.exists() && webXml.canRead(); - } - - static private void scanForUpdates(final File contextRoot, final WebAppContext context) { - List scanList = new ArrayList(); - - scanList.add(new File(contextRoot, "WEB-INF/web.xml")); - findFiles(".class", new File(contextRoot, "WEB-INF/classes"), scanList); - findFiles(".jar", new File(contextRoot, "WEB-INF/lib"), scanList); - - logger.info("Starting autoreloading scanner... "); - - Scanner scanner = new Scanner(); - scanner.setScanInterval(Configurations.getInteger("gridworks.scanner.period",1)); - scanner.setScanDirs(scanList); - scanner.setReportExistingFilesOnStartup(false); - - scanner.addListener(new Scanner.BulkListener() { - public void filesChanged(@SuppressWarnings("rawtypes") List changedFiles) { - try { - logger.info("Stopping context: " + contextRoot.getAbsolutePath()); - context.stop(); - - logger.info("Starting context: " + contextRoot.getAbsolutePath()); - context.start(); - - configure(context); - } catch (Exception ex) { - throw new RuntimeException(ex); - } - } - }); - - scanner.start(); - } - - static private void findFiles(final String extension, File baseDir, final Collection found) { - baseDir.listFiles(new FileFilter() { - public boolean accept(File pathname) { - if (pathname.isDirectory()) { - findFiles(extension, pathname, found); - } else if (pathname.getName().endsWith(extension)) { - found.add(pathname); - } - return false; - } - }); - } - - // inject configuration parameters in the servlets - // NOTE: this is done *after* starting the server because jetty might override the init - // parameters if we set them in the webapp context upon reading the web.xml file - static private void configure(WebAppContext context) throws Exception { - ServletHolder servlet = context.getServletHandler().getServlet("gridworks"); - if (servlet != null) { - servlet.setInitParameter("gridworks.data", getDataDir()); - servlet.setInitParameter("butterfly.modules.path", getDataDir() + "/extensions"); - servlet.setInitOrder(1); - servlet.doStart(); - } - - servlet = context.getServletHandler().getServlet("gridworks-broker"); - if (servlet != null) { - servlet.setInitParameter("gridworks.data", getDataDir() + "/broker"); - servlet.setInitParameter("gridworks.development", Configurations.get("gridworks.development","false")); - servlet.setInitOrder(1); - servlet.doStart(); - } - } - - static private String getDataDir() { - - String data_dir = Configurations.get("gridworks.data_dir"); - if (data_dir != null) { - return data_dir; - } - - String os = System.getProperty("os.name").toLowerCase(); - if (os.contains("windows")) { - try { - // NOTE(SM): finding the "local data app" in windows from java is actually a PITA - // see http://stackoverflow.com/questions/1198911/how-to-get-local-application-data-folder-in-java - // so we're using a library that uses JNI to ask directly the win32 APIs, - // it's not elegant but it's the safest bet. - - DataPath localDataPath = JDataPathSystem.getLocalSystem().getLocalDataPath("Gridworks"); - File data = new File(fixWindowsUnicodePath(localDataPath.getPath())); - data.mkdirs(); - return data.getAbsolutePath(); - } catch (Error e) { - /* - * The above trick can fail, particularly on a 64-bit OS as the jdatapath.dll - * we include is compiled for 32-bit. In this case, we just have to dig up - * environment variables and try our best to find a user-specific path. - */ - - logger.warn("Failed to use jdatapath to detect user data path: resorting to environment variables"); - - File parentDir = null; - String appData = System.getenv("APPDATA"); - if (appData != null && appData.length() > 0) { - // e.g., C:\Users\[userid]\AppData\Roaming - parentDir = new File(appData); - } else { - String userProfile = System.getenv("USERPROFILE"); - if (userProfile != null && userProfile.length() > 0) { - // e.g., C:\Users\[userid] - parentDir = new File(userProfile); - } - } - - if (parentDir == null) { - parentDir = new File("."); - } - - File data = new File(parentDir, "Gridworks"); - data.mkdirs(); - - return data.getAbsolutePath(); - } - } else if (os.contains("mac os x")) { - // on macosx, use "~/Library/Application Support" - String home = System.getProperty("user.home"); - String data_home = (home != null) ? home + "/Library/Application Support/Gridworks" : ".gridworks"; - File data = new File(data_home); - data.mkdirs(); - return data.getAbsolutePath(); - } else { // most likely a UNIX flavor - // start with the XDG environment - // see http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html - String data_home = System.getenv("XDG_DATA_HOME"); - if (data_home == null) { // if not found, default back to ~/.local/share - String home = System.getProperty("user.home"); - if (home == null) home = "."; - data_home = home + "/.local/share"; - } - File data = new File(data_home + "/gridworks"); - data.mkdirs(); - return data.getAbsolutePath(); - } - } - - /** - * For Windows file paths that contain user IDs with non ASCII characters, - * those characters might get replaced with ?. We need to use the environment - * APPDATA value to substitute back the original user ID. - */ - static private String fixWindowsUnicodePath(String path) { - int q = path.indexOf('?'); - if (q < 0) { - return path; - } - int pathSep = path.indexOf(File.separatorChar, q); - - String goodPath = System.getenv("APPDATA"); - if (goodPath == null || goodPath.length() == 0) { - goodPath = System.getenv("USERPROFILE"); - if (!goodPath.endsWith(File.separator)) { - goodPath = goodPath + File.separator; - } - } - - int goodPathSep = goodPath.indexOf(File.separatorChar, q); - - return path.substring(0, q) + goodPath.substring(q, goodPathSep) + path.substring(pathSep); - } - -} - -/* -------------- Gridworks Client ----------------- */ - -class GridworksClient extends JFrame implements ActionListener { - - private static final long serialVersionUID = 7886547342175227132L; - - public static boolean MACOSX = (System.getProperty("os.name").toLowerCase().startsWith("mac os x")); - - private URI uri; - - public void init(String host, int port) throws Exception { - - uri = new URI("http://" + host + ":" + port + "/"); - - if (MACOSX) { - - // for more info on the code found here that is macosx-specific see: - // http://developer.apple.com/mac/library/documentation/Java/Conceptual/Java14Development/07-NativePlatformIntegration/NativePlatformIntegration.html - // http://developer.apple.com/mac/library/releasenotes/CrossPlatform/JavaSnowLeopardUpdate1LeopardUpdate6RN/NewandNoteworthy/NewandNoteworthy.html - - JMenuBar mb = new JMenuBar(); - JMenu m = new JMenu("Open"); - JMenuItem mi = new JMenuItem("Open New Gridworks Window..."); - mi.addActionListener(this); - m.add(mi); - mb.add(m); - - Class applicationClass = Class.forName("com.apple.eawt.Application"); - Object macOSXApplication = applicationClass.getConstructor((Class[]) null).newInstance((Object[]) null); - Method setDefaultMenuBar = applicationClass.getDeclaredMethod("setDefaultMenuBar", new Class[] { JMenuBar.class }); - setDefaultMenuBar.invoke(macOSXApplication, new Object[] { mb }); - - // FIXME(SM): this part below doesn't seem to work, I get a NPE but I have *no* idea why, suggestions? - -// PopupMenu dockMenu = new PopupMenu("dock"); -// MenuItem mmi = new MenuItem("Open new Gridworks Window..."); -// mmi.addActionListener(this); -// dockMenu.add(mmi); -// this.add(dockMenu); -// -// Method setDockMenu = applicationClass.getDeclaredMethod("setDockMenu", new Class[] { PopupMenu.class }); -// setDockMenu.invoke(macOSXApplication, new Object[] { dockMenu }); - } - - openBrowser(); - } - - public void actionPerformed(ActionEvent e) { - String item = e.getActionCommand(); - if (item.startsWith("Open")) { - openBrowser(); - } - } - - private void openBrowser() { - try { - Desktop.getDesktop().browse(uri); - } catch (IOException e) { - throw new RuntimeException(e); - } - } -} - -class ShutdownSignalHandler implements Runnable { - - private Server _server; - - public ShutdownSignalHandler(Server server) { - this._server = server; - } - - public void run() { - - // Tell the server we want to try and shutdown gracefully - // this means that the server will stop accepting new connections - // right away but it will continue to process the ones that - // are in execution for the given timeout before attempting to stop - // NOTE: this is *not* a blocking method, it just sets a parameter - // that _server.stop() will rely on - _server.setGracefulShutdown(3000); - - try { - _server.stop(); - } catch (Exception e) { - e.printStackTrace(); - System.exit(1); - } - } - -} - \ No newline at end of file diff --git a/server/src/com/google/refine/Configurations.java b/server/src/com/google/refine/Configurations.java new file mode 100644 index 000000000..42945abd6 --- /dev/null +++ b/server/src/com/google/refine/Configurations.java @@ -0,0 +1,40 @@ +package com.google.refine; + +/** + * Centralized configuration facility. + */ +public class Configurations { + + public static String get(final String name) { + return System.getProperty(name); + } + + public static String get(final String name, final String def) { + final String val = get(name); + return (val == null) ? def : val; + } + + public static boolean getBoolean(final String name, final boolean def) { + final String val = get(name); + return (val == null) ? def : Boolean.parseBoolean(val); + } + + public static int getInteger(final String name, final int def) { + final String val = get(name); + try { + return (val == null) ? def : Integer.parseInt(val); + } catch (NumberFormatException e) { + throw new RuntimeException("Could not parse '" + val + "' as an integer number.", e); + } + } + + public static float getFloat(final String name, final float def) { + final String val = get(name); + try { + return (val == null) ? def : Float.parseFloat(val); + } catch (NumberFormatException e) { + throw new RuntimeException("Could not parse '" + val + "' as a floating point number.", e); + } + } + +} diff --git a/server/src/com/google/refine/Gridworks.java b/server/src/com/google/refine/Gridworks.java new file mode 100644 index 000000000..19e7a4bf8 --- /dev/null +++ b/server/src/com/google/refine/Gridworks.java @@ -0,0 +1,445 @@ +package com.google.refine; + +import java.awt.Desktop; +import java.awt.event.ActionEvent; +import java.awt.event.ActionListener; +import java.io.File; +import java.io.FileFilter; +import java.io.IOException; +import java.lang.reflect.Method; +import java.net.URI; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; + +import javax.swing.JFrame; +import javax.swing.JMenu; +import javax.swing.JMenuBar; +import javax.swing.JMenuItem; + +import org.apache.log4j.Level; +import org.mortbay.jetty.Connector; +import org.mortbay.jetty.Server; +import org.mortbay.jetty.bio.SocketConnector; +import org.mortbay.jetty.servlet.ServletHolder; +import org.mortbay.jetty.webapp.WebAppContext; +import org.mortbay.util.Scanner; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.codeberry.jdatapath.DataPath; +import com.codeberry.jdatapath.JDataPathSystem; +import com.google.util.threads.ThreadPoolExecutorAdapter; + +/** + * Main class for Gridworks server application. Starts an instance of the + * Jetty HTTP server / servlet container (inner class Gridworks Server). + */ +public class Gridworks { + + static private final String DEFAULT_HOST = "127.0.0.1"; + static private final int DEFAULT_PORT = 3333; + + static private int port; + static private String host; + + final static Logger logger = LoggerFactory.getLogger("gridworks"); + + public static void main(String[] args) throws Exception { + + // tell jetty to use SLF4J for logging instead of its own stuff + System.setProperty("VERBOSE","false"); + System.setProperty("org.mortbay.log.class","org.mortbay.log.Slf4jLog"); + + // tell macosx to keep the menu associated with the screen + System.setProperty("apple.laf.useScreenMenuBar", "true"); + System.setProperty("com.apple.eawt.CocoaComponent.CompatibilityMode", "false"); + + // tell the signpost library to log + //System.setProperty("debug","true"); + + // if not already set, make sure jython can find its python stuff + if (System.getProperty("python.path") == null) { + System.setProperty("python.path","lib/python"); + } + + // set the log verbosity level + org.apache.log4j.Logger.getRootLogger().setLevel(Level.toLevel(Configurations.get("gridworks.verbosity","info"))); + + port = Configurations.getInteger("gridworks.port",DEFAULT_PORT); + host = Configurations.get("gridworks.host",DEFAULT_HOST); + + Gridworks gridworks = new Gridworks(); + + gridworks.init(args); + } + + public void init(String[] args) throws Exception { + + GridworksServer server = new GridworksServer(); + server.init(host,port); + + boolean headless = Configurations.getBoolean("gridworks.headless",false); + if (!headless) { + try { + GridworksClient client = new GridworksClient(); + client.init(host,port); + } catch (Exception e) { + logger.warn("Sorry, some error prevented us from launching the browser for you.\n\n Point your browser to http://" + host + ":" + port + "/ to start using Gridworks."); + } + } + + // hook up the signal handlers + Runtime.getRuntime().addShutdownHook( + new Thread(new ShutdownSignalHandler(server)) + ); + + server.join(); + } +} + +/* -------------- Gridworks Server ----------------- */ + +class GridworksServer extends Server { + + final static Logger logger = LoggerFactory.getLogger("gridworks_server"); + + private ThreadPoolExecutor threadPool; + + public void init(String host, int port) throws Exception { + logger.info("Starting Server bound to '" + host + ":" + port + "'"); + + String memory = Configurations.get("gridworks.memory"); + if (memory != null) logger.info("Max memory size: " + memory); + + int maxThreads = Configurations.getInteger("gridworks.queue.size", 30); + int maxQueue = Configurations.getInteger("gridworks.queue.max_size", 300); + long keepAliveTime = Configurations.getInteger("gridworks.queue.idle_time", 60); + + LinkedBlockingQueue queue = new LinkedBlockingQueue(maxQueue); + + threadPool = new ThreadPoolExecutor(maxThreads, maxQueue, keepAliveTime, TimeUnit.SECONDS, queue); + + this.setThreadPool(new ThreadPoolExecutorAdapter(threadPool)); + + Connector connector = new SocketConnector(); + connector.setPort(port); + connector.setHost(host); + connector.setMaxIdleTime(Configurations.getInteger("gridworks.connection.max_idle_time",60000)); + connector.setStatsOn(false); + this.addConnector(connector); + + File webapp = new File(Configurations.get("gridworks.webapp","main/webapp")); + + if (!isWebapp(webapp)) { + webapp = new File("main/webapp"); + if (!isWebapp(webapp)) { + webapp = new File("webapp"); + if (!isWebapp(webapp)) { + logger.warn("Warning: Failed to find web application at '" + webapp.getAbsolutePath() + "'"); + System.exit(-1); + } + } + } + + final String contextPath = Configurations.get("gridworks.context_path","/"); + + logger.info("Initializing context: '" + contextPath + "' from '" + webapp.getAbsolutePath() + "'"); + WebAppContext context = new WebAppContext(webapp.getAbsolutePath(), contextPath); + context.setMaxFormContentSize(1048576); + + this.setHandler(context); + this.setStopAtShutdown(true); + this.setSendServerVersion(true); + + // Enable context autoreloading + if (Configurations.getBoolean("gridworks.autoreload",false)) { + scanForUpdates(webapp, context); + } + + // start the server + this.start(); + + configure(context); + } + + @Override + protected void doStop() throws Exception { + try { + // shutdown our scheduled tasks first, if any + if (threadPool != null) threadPool.shutdown(); + + // then let the parent stop + super.doStop(); + } catch (InterruptedException e) { + // ignore + } + } + + static private boolean isWebapp(File dir) { + if (dir == null) return false; + if (!dir.exists() || !dir.canRead()) return false; + File webXml = new File(dir, "WEB-INF/web.xml"); + return webXml.exists() && webXml.canRead(); + } + + static private void scanForUpdates(final File contextRoot, final WebAppContext context) { + List scanList = new ArrayList(); + + scanList.add(new File(contextRoot, "WEB-INF/web.xml")); + findFiles(".class", new File(contextRoot, "WEB-INF/classes"), scanList); + findFiles(".jar", new File(contextRoot, "WEB-INF/lib"), scanList); + + logger.info("Starting autoreloading scanner... "); + + Scanner scanner = new Scanner(); + scanner.setScanInterval(Configurations.getInteger("gridworks.scanner.period",1)); + scanner.setScanDirs(scanList); + scanner.setReportExistingFilesOnStartup(false); + + scanner.addListener(new Scanner.BulkListener() { + public void filesChanged(@SuppressWarnings("rawtypes") List changedFiles) { + try { + logger.info("Stopping context: " + contextRoot.getAbsolutePath()); + context.stop(); + + logger.info("Starting context: " + contextRoot.getAbsolutePath()); + context.start(); + + configure(context); + } catch (Exception ex) { + throw new RuntimeException(ex); + } + } + }); + + scanner.start(); + } + + static private void findFiles(final String extension, File baseDir, final Collection found) { + baseDir.listFiles(new FileFilter() { + public boolean accept(File pathname) { + if (pathname.isDirectory()) { + findFiles(extension, pathname, found); + } else if (pathname.getName().endsWith(extension)) { + found.add(pathname); + } + return false; + } + }); + } + + // inject configuration parameters in the servlets + // NOTE: this is done *after* starting the server because jetty might override the init + // parameters if we set them in the webapp context upon reading the web.xml file + static private void configure(WebAppContext context) throws Exception { + ServletHolder servlet = context.getServletHandler().getServlet("gridworks"); + if (servlet != null) { + servlet.setInitParameter("gridworks.data", getDataDir()); + servlet.setInitParameter("butterfly.modules.path", getDataDir() + "/extensions"); + servlet.setInitOrder(1); + servlet.doStart(); + } + + servlet = context.getServletHandler().getServlet("gridworks-broker"); + if (servlet != null) { + servlet.setInitParameter("gridworks.data", getDataDir() + "/broker"); + servlet.setInitParameter("gridworks.development", Configurations.get("gridworks.development","false")); + servlet.setInitOrder(1); + servlet.doStart(); + } + } + + static private String getDataDir() { + + String data_dir = Configurations.get("gridworks.data_dir"); + if (data_dir != null) { + return data_dir; + } + + String os = System.getProperty("os.name").toLowerCase(); + if (os.contains("windows")) { + try { + // NOTE(SM): finding the "local data app" in windows from java is actually a PITA + // see http://stackoverflow.com/questions/1198911/how-to-get-local-application-data-folder-in-java + // so we're using a library that uses JNI to ask directly the win32 APIs, + // it's not elegant but it's the safest bet. + + DataPath localDataPath = JDataPathSystem.getLocalSystem().getLocalDataPath("Gridworks"); + File data = new File(fixWindowsUnicodePath(localDataPath.getPath())); + data.mkdirs(); + return data.getAbsolutePath(); + } catch (Error e) { + /* + * The above trick can fail, particularly on a 64-bit OS as the jdatapath.dll + * we include is compiled for 32-bit. In this case, we just have to dig up + * environment variables and try our best to find a user-specific path. + */ + + logger.warn("Failed to use jdatapath to detect user data path: resorting to environment variables"); + + File parentDir = null; + String appData = System.getenv("APPDATA"); + if (appData != null && appData.length() > 0) { + // e.g., C:\Users\[userid]\AppData\Roaming + parentDir = new File(appData); + } else { + String userProfile = System.getenv("USERPROFILE"); + if (userProfile != null && userProfile.length() > 0) { + // e.g., C:\Users\[userid] + parentDir = new File(userProfile); + } + } + + if (parentDir == null) { + parentDir = new File("."); + } + + File data = new File(parentDir, "Gridworks"); + data.mkdirs(); + + return data.getAbsolutePath(); + } + } else if (os.contains("mac os x")) { + // on macosx, use "~/Library/Application Support" + String home = System.getProperty("user.home"); + String data_home = (home != null) ? home + "/Library/Application Support/Gridworks" : ".gridworks"; + File data = new File(data_home); + data.mkdirs(); + return data.getAbsolutePath(); + } else { // most likely a UNIX flavor + // start with the XDG environment + // see http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html + String data_home = System.getenv("XDG_DATA_HOME"); + if (data_home == null) { // if not found, default back to ~/.local/share + String home = System.getProperty("user.home"); + if (home == null) home = "."; + data_home = home + "/.local/share"; + } + File data = new File(data_home + "/gridworks"); + data.mkdirs(); + return data.getAbsolutePath(); + } + } + + /** + * For Windows file paths that contain user IDs with non ASCII characters, + * those characters might get replaced with ?. We need to use the environment + * APPDATA value to substitute back the original user ID. + */ + static private String fixWindowsUnicodePath(String path) { + int q = path.indexOf('?'); + if (q < 0) { + return path; + } + int pathSep = path.indexOf(File.separatorChar, q); + + String goodPath = System.getenv("APPDATA"); + if (goodPath == null || goodPath.length() == 0) { + goodPath = System.getenv("USERPROFILE"); + if (!goodPath.endsWith(File.separator)) { + goodPath = goodPath + File.separator; + } + } + + int goodPathSep = goodPath.indexOf(File.separatorChar, q); + + return path.substring(0, q) + goodPath.substring(q, goodPathSep) + path.substring(pathSep); + } + +} + +/* -------------- Gridworks Client ----------------- */ + +class GridworksClient extends JFrame implements ActionListener { + + private static final long serialVersionUID = 7886547342175227132L; + + public static boolean MACOSX = (System.getProperty("os.name").toLowerCase().startsWith("mac os x")); + + private URI uri; + + public void init(String host, int port) throws Exception { + + uri = new URI("http://" + host + ":" + port + "/"); + + if (MACOSX) { + + // for more info on the code found here that is macosx-specific see: + // http://developer.apple.com/mac/library/documentation/Java/Conceptual/Java14Development/07-NativePlatformIntegration/NativePlatformIntegration.html + // http://developer.apple.com/mac/library/releasenotes/CrossPlatform/JavaSnowLeopardUpdate1LeopardUpdate6RN/NewandNoteworthy/NewandNoteworthy.html + + JMenuBar mb = new JMenuBar(); + JMenu m = new JMenu("Open"); + JMenuItem mi = new JMenuItem("Open New Gridworks Window..."); + mi.addActionListener(this); + m.add(mi); + mb.add(m); + + Class applicationClass = Class.forName("com.apple.eawt.Application"); + Object macOSXApplication = applicationClass.getConstructor((Class[]) null).newInstance((Object[]) null); + Method setDefaultMenuBar = applicationClass.getDeclaredMethod("setDefaultMenuBar", new Class[] { JMenuBar.class }); + setDefaultMenuBar.invoke(macOSXApplication, new Object[] { mb }); + + // FIXME(SM): this part below doesn't seem to work, I get a NPE but I have *no* idea why, suggestions? + +// PopupMenu dockMenu = new PopupMenu("dock"); +// MenuItem mmi = new MenuItem("Open new Gridworks Window..."); +// mmi.addActionListener(this); +// dockMenu.add(mmi); +// this.add(dockMenu); +// +// Method setDockMenu = applicationClass.getDeclaredMethod("setDockMenu", new Class[] { PopupMenu.class }); +// setDockMenu.invoke(macOSXApplication, new Object[] { dockMenu }); + } + + openBrowser(); + } + + public void actionPerformed(ActionEvent e) { + String item = e.getActionCommand(); + if (item.startsWith("Open")) { + openBrowser(); + } + } + + private void openBrowser() { + try { + Desktop.getDesktop().browse(uri); + } catch (IOException e) { + throw new RuntimeException(e); + } + } +} + +class ShutdownSignalHandler implements Runnable { + + private Server _server; + + public ShutdownSignalHandler(Server server) { + this._server = server; + } + + public void run() { + + // Tell the server we want to try and shutdown gracefully + // this means that the server will stop accepting new connections + // right away but it will continue to process the ones that + // are in execution for the given timeout before attempting to stop + // NOTE: this is *not* a blocking method, it just sets a parameter + // that _server.stop() will rely on + _server.setGracefulShutdown(3000); + + try { + _server.stop(); + } catch (Exception e) { + e.printStackTrace(); + System.exit(1); + } + } + +} + \ No newline at end of file