From d5abaac6dfee2a29d3a16982c7403d4448cc3713 Mon Sep 17 00:00:00 2001 From: Tom Morris Date: Wed, 22 Jul 2020 16:12:30 -0400 Subject: [PATCH] Update marc4j to 2.9.1 - Fixes #2962 (#2977) * Add a MARC import test * Make sure data directory is directory, not a file * Update to marc4j 2.9.1 - fixes #2962 --- main/pom.xml | 4 +- main/tests/data/scriblio.mrc | 1 + .../com/google/refine/RefineServletStub.java | 9 +- .../refine/importers/MarcImporterTests.java | 123 ++++++++++++++++++ 4 files changed, 132 insertions(+), 5 deletions(-) create mode 100644 main/tests/data/scriblio.mrc create mode 100644 main/tests/server/src/com/google/refine/importers/MarcImporterTests.java diff --git a/main/pom.xml b/main/pom.xml index 540c88ccb..5589c8abb 100644 --- a/main/pom.xml +++ b/main/pom.xml @@ -231,9 +231,9 @@ 1.5 - org.openrefine.dependencies + org.marc4j marc4j - 2.4 + 2.9.1 org.apache.commons diff --git a/main/tests/data/scriblio.mrc b/main/tests/data/scriblio.mrc new file mode 100644 index 000000000..a9f76e604 --- /dev/null +++ b/main/tests/data/scriblio.mrc @@ -0,0 +1 @@ +00762cam 22002658a 4500001001300000003000400013005001700017008004100034010001700075020002400092040001800116050002400134082001600158100003800174245005700212260005800269263000900327300001100336440002500347651002900372650002900401650002600430655002100456700001900477 93032341 DLC20000302171755.0930830s1994 enk 000 0 eng  a 93032341  a0192814591 :c¹4.99 aDLCcDLCdDLC00aPR2801.A2bS66 199400a822.3/32201 aShakespeare, William,d1564-1616.10aAll's well that ends well /cedited by Susan Snyder. aOxford ;aNew York :bOxford University Press,c1994. a9402 ap. cm. 4aThe World's classics 0aFlorence (Italy)xDrama. 0aRunaway husbandsxDrama. 0aMarried womenxDrama. 7aComedies.2gsafd1 aSnyder, Susan. \ No newline at end of file diff --git a/main/tests/server/src/com/google/refine/RefineServletStub.java b/main/tests/server/src/com/google/refine/RefineServletStub.java index 1049616bf..788006a99 100644 --- a/main/tests/server/src/com/google/refine/RefineServletStub.java +++ b/main/tests/server/src/com/google/refine/RefineServletStub.java @@ -1,6 +1,7 @@ /* -Copyright 2010, Google Inc. +Copyright 2010 Google Inc. +Copyright 2019,2020 OpenRefine contributors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -35,12 +36,13 @@ package com.google.refine; import java.io.File; import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; -import com.google.refine.RefineServlet; import com.google.refine.commands.Command; /** @@ -66,7 +68,8 @@ public class RefineServletStub extends RefineServlet { public File getTempDir() { if (tempDir == null) { try { - tempDir = File.createTempFile("refine-test-dir", ""); + Path tempDirPath = Files.createTempDirectory("refine-test-dir"); + tempDir = tempDirPath.toFile(); tempDir.deleteOnExit(); } catch (IOException e) { throw new RuntimeException("Failed to create temp directory",e); diff --git a/main/tests/server/src/com/google/refine/importers/MarcImporterTests.java b/main/tests/server/src/com/google/refine/importers/MarcImporterTests.java new file mode 100644 index 000000000..0da847e8d --- /dev/null +++ b/main/tests/server/src/com/google/refine/importers/MarcImporterTests.java @@ -0,0 +1,123 @@ +/* + +Copyright 2020, Thomas F. Morris & OpenRefine contributors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +package com.google.refine.importers; + +import static org.testng.Assert.assertEquals; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.io.FileUtils; +import org.slf4j.LoggerFactory; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.Test; + +import com.fasterxml.jackson.databind.node.ArrayNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.google.refine.importing.ImportingUtilities; +import com.google.refine.model.Row; +import com.google.refine.util.JSONUtilities; +import com.google.refine.util.ParsingUtilities; + +public class MarcImporterTests extends XmlImporterTests { + + @Override + @BeforeTest + public void init() { + logger = LoggerFactory.getLogger(this.getClass()); + } + + // System Under Test + MarcImporter SUT = null; + + @Override + @BeforeMethod + public void setUp() { + super.setUp(); + SUT = new MarcImporter(); + } + + @Override + @AfterMethod + public void tearDown() { + SUT = null; + super.tearDown(); + } + + + @Test + public void readMarcFileWithUnicode() throws FileNotFoundException, IOException { + final String FILE = "scriblio.mrc"; + String filename = ClassLoader.getSystemResource(FILE).getPath(); + // File is assumed to be in job.getRawDataDir(), so copy it there + FileUtils.copyFile(new File(filename), new File(job.getRawDataDir(), FILE)); + List fileRecords = new ArrayList<>(); + fileRecords.add(ParsingUtilities.evaluateJsonStringToObjectNode(String.format("{\"location\": \"%s\"}", FILE))); + + // NOTE: This has the side effect of creating scriblio.mrc.xml + ObjectNode options = SUT.createParserUIInitializationData( + job, fileRecords, "binary/marc"); + + ArrayNode path = ParsingUtilities.mapper.createArrayNode(); + JSONUtilities.append(path, "marc:collection"); + JSONUtilities.append(path, "marc:record"); + JSONUtilities.safePut(options, "recordPath", path); + JSONUtilities.safePut(options, "trimStrings", true); + JSONUtilities.safePut(options, "storeEmptyStrings", false); + + File xmlFile = ImportingUtilities.getFile(job, fileRecords.get(0)); + InputStream inputStream = new FileInputStream(xmlFile); + parseOneFile(SUT, inputStream, options); + assertEquals(project.rows.size(), 30); + assertEquals(project.rows.get(1).cells.size(), 6); + + Row r0 = project.rows.get(0); + assertEquals(r0.getCellValue(1), "001"); + assertEquals(r0.getCellValue(3), "010"); + assertEquals(project.rows.get(1).getCellValue(1), "003"); + assertEquals(project.rows.get(1).getCellValue(2), "DLC"); + Row r2 = project.rows.get(2); + assertEquals(r2.getCellValue(1), "005"); + assertEquals(r2.getCellValue(5), "£4.99"); + assertEquals(project.rows.get(29).getCellValue(3),"700"); + } + + +}