Update marc4j to 2.9.1 - Fixes #2962 (#2977)

* Add a MARC import test

* Make sure data directory is directory, not a file

* Update to marc4j 2.9.1 - fixes #2962
This commit is contained in:
Tom Morris 2020-07-22 16:12:30 -04:00 committed by GitHub
parent c22078b2c9
commit d5abaac6df
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 132 additions and 5 deletions

View File

@ -231,9 +231,9 @@
<version>1.5</version> <version>1.5</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.openrefine.dependencies</groupId> <groupId>org.marc4j</groupId>
<artifactId>marc4j</artifactId> <artifactId>marc4j</artifactId>
<version>2.4</version> <version>2.9.1</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.commons</groupId> <groupId>org.apache.commons</groupId>

View File

@ -0,0 +1 @@
00762cam 22002658a 4500001001300000003000400013005001700017008004100034010001700075020002400092040001800116050002400134082001600158100003800174245005700212260005800269263000900327300001100336440002500347651002900372650002900401650002600430655002100456700001900477 93032341 DLC20000302171755.0930830s1994 enk 000 0 eng  a 93032341  a0192814591 :c¹4.99 aDLCcDLCdDLC00aPR2801.A2bS66 199400a822.3/32201 aShakespeare, William,d1564-1616.10aAll's well that ends well /cedited by Susan Snyder. aOxford ;aNew York :bOxford University Press,c1994. a9402 ap. cm. 4aThe World's classics 0aFlorence (Italy)xDrama. 0aRunaway husbandsxDrama. 0aMarried womenxDrama. 7aComedies.2gsafd1 aSnyder, Susan.

View File

@ -1,6 +1,7 @@
/* /*
Copyright 2010, Google Inc. Copyright 2010 Google Inc.
Copyright 2019,2020 OpenRefine contributors
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -35,12 +36,13 @@ package com.google.refine;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import javax.servlet.ServletException; import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse; import javax.servlet.http.HttpServletResponse;
import com.google.refine.RefineServlet;
import com.google.refine.commands.Command; import com.google.refine.commands.Command;
/** /**
@ -66,7 +68,8 @@ public class RefineServletStub extends RefineServlet {
public File getTempDir() { public File getTempDir() {
if (tempDir == null) { if (tempDir == null) {
try { try {
tempDir = File.createTempFile("refine-test-dir", ""); Path tempDirPath = Files.createTempDirectory("refine-test-dir");
tempDir = tempDirPath.toFile();
tempDir.deleteOnExit(); tempDir.deleteOnExit();
} catch (IOException e) { } catch (IOException e) {
throw new RuntimeException("Failed to create temp directory",e); throw new RuntimeException("Failed to create temp directory",e);

View File

@ -0,0 +1,123 @@
/*
Copyright 2020, Thomas F. Morris & OpenRefine contributors
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.google.refine.importers;
import static org.testng.Assert.assertEquals;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.io.FileUtils;
import org.slf4j.LoggerFactory;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.refine.importing.ImportingUtilities;
import com.google.refine.model.Row;
import com.google.refine.util.JSONUtilities;
import com.google.refine.util.ParsingUtilities;
public class MarcImporterTests extends XmlImporterTests {
@Override
@BeforeTest
public void init() {
logger = LoggerFactory.getLogger(this.getClass());
}
// System Under Test
MarcImporter SUT = null;
@Override
@BeforeMethod
public void setUp() {
super.setUp();
SUT = new MarcImporter();
}
@Override
@AfterMethod
public void tearDown() {
SUT = null;
super.tearDown();
}
@Test
public void readMarcFileWithUnicode() throws FileNotFoundException, IOException {
final String FILE = "scriblio.mrc";
String filename = ClassLoader.getSystemResource(FILE).getPath();
// File is assumed to be in job.getRawDataDir(), so copy it there
FileUtils.copyFile(new File(filename), new File(job.getRawDataDir(), FILE));
List<ObjectNode> fileRecords = new ArrayList<>();
fileRecords.add(ParsingUtilities.evaluateJsonStringToObjectNode(String.format("{\"location\": \"%s\"}", FILE)));
// NOTE: This has the side effect of creating scriblio.mrc.xml
ObjectNode options = SUT.createParserUIInitializationData(
job, fileRecords, "binary/marc");
ArrayNode path = ParsingUtilities.mapper.createArrayNode();
JSONUtilities.append(path, "marc:collection");
JSONUtilities.append(path, "marc:record");
JSONUtilities.safePut(options, "recordPath", path);
JSONUtilities.safePut(options, "trimStrings", true);
JSONUtilities.safePut(options, "storeEmptyStrings", false);
File xmlFile = ImportingUtilities.getFile(job, fileRecords.get(0));
InputStream inputStream = new FileInputStream(xmlFile);
parseOneFile(SUT, inputStream, options);
assertEquals(project.rows.size(), 30);
assertEquals(project.rows.get(1).cells.size(), 6);
Row r0 = project.rows.get(0);
assertEquals(r0.getCellValue(1), "001");
assertEquals(r0.getCellValue(3), "010");
assertEquals(project.rows.get(1).getCellValue(1), "003");
assertEquals(project.rows.get(1).getCellValue(2), "DLC");
Row r2 = project.rows.get(2);
assertEquals(r2.getCellValue(1), "005");
assertEquals(r2.getCellValue(5), "£4.99");
assertEquals(project.rows.get(29).getCellValue(3),"700");
}
}