Fix: Preventing addition of any empty cells with whitespaces while importing Xml Data with Tests #1095 (#3357)

* Fix: Preventing addition of any empty cells with whitespaces while importing Xml data with Tests : Issue #1095

* Chore: Using 'CharMatcher' to match whitespace pattern instead of using custom regex : Issue #1095
This commit is contained in:
Mahesh Jindal 2020-12-02 22:41:45 +05:30 committed by GitHub
parent 6edfda79a3
commit 4f97fd55a5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 686 additions and 2 deletions

View File

@ -46,6 +46,7 @@ import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import com.google.common.base.CharMatcher;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -209,7 +210,8 @@ public class XmlImporter extends TreeImportingParserBase {
static public class XmlParser implements TreeReader {
final protected XMLStreamReader parser;
static final int WHITESPACE_CHARACTERS_TOKEN = 15;
public XmlParser(InputStream inputStream) throws XMLStreamException, IOException {
parser = createXMLStreamReader(inputStream);
}
@ -230,7 +232,15 @@ public class XmlImporter extends TreeImportingParserBase {
} catch (XMLStreamException e) {
throw new TreeReaderException(e);
}
// Issue #1095 : Preventing addition of empty cells containing whitespaces in the table
// Whitespaces between tags will be parsed as Characters by default
// Updates the token if the text value is a whitespace
if (currentToken == XMLStreamConstants.CHARACTERS) {
String text = parser.getText();
if (!text.isEmpty() && CharMatcher.whitespace().matchesAllOf(text)) {
currentToken = WHITESPACE_CHARACTERS_TOKEN;
}
}
return mapToToken(currentToken);
}
@ -250,6 +260,7 @@ public class XmlImporter extends TreeImportingParserBase {
case XMLStreamConstants.COMMENT: return Token.Ignorable;
case XMLStreamConstants.CDATA: return Token.Ignorable;
case XMLStreamConstants.ATTRIBUTE: return Token.Ignorable;
case WHITESPACE_CHARACTERS_TOKEN: return Token.Ignorable;
default:
return Token.Ignorable;
}

View File

@ -0,0 +1,24 @@
<?xml version = "1.0"?>
<library>
<book id="1">
<author>
<author-name>author1</author-name>
<author-dob>a date</author-dob>
</author>
<genre>genre1</genre>
</book>
<book id="2">
<author>
<author-name>author2</author-name>
<author-dob>a date2</author-dob>
</author>
<genre>genre2</genre>
</book>
<book id="3">
<author>
<author-name>author3</author-name>
<author-dob>a date3</author-dob>
</author>
<genre>genre3</genre>
</book>
</library>

View File

@ -0,0 +1,24 @@
<?xml version = "1.0"?>
<library >
<book id = "1">
<author >
<author-name >author1</author-name >
<author-dob >a date</author-dob >
</author >
<genre >genre1</genre >
</book >
<book id = "2">
<author >
<author-name >author2</author-name >
<author-dob >a date2</author-dob >
</author >
<genre >genre2</genre >
</book >
<book id = "3">
<author >
<author-name >author3</author-name >
<author-dob >a date3</author-dob >
</author >
<genre >genre3</genre >
</book >
</library >

View File

@ -0,0 +1,24 @@
<?xml version = "1.0"?>
<library>
<book id=" 1 ">
<author>
<author-name> author1 </author-name>
<author-dob> a date </author-dob>
</author>
<genre> genre1 </genre>
</book>
<book id=" 2 ">
<author>
<author-name> author2 </author-name>
<author-dob> a date2 </author-dob>
</author>
<genre> genre2 </genre>
</book>
<book id=" 3 ">
<author>
<author-name> author3 </author-name>
<author-dob> a date3 </author-dob>
</author>
<genre> genre3 </genre>
</book>
</library>

View File

@ -0,0 +1,503 @@
<wb:data xmlns:wb="http://www.worldbank.org" page="1" pages="11" per_page="50" total="528" sourceid="2" lastupdated="2020-10-15">
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="1A">Arab World</wb:country>
<wb:countryiso3code>ARB</wb:countryiso3code>
<wb:date>2001</wb:date>
<wb:value>288432163</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="1A">Arab World</wb:country>
<wb:countryiso3code>ARB</wb:countryiso3code>
<wb:date>2000</wb:date>
<wb:value>282344154</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="S3">Caribbean small states</wb:country>
<wb:countryiso3code>CSS</wb:countryiso3code>
<wb:date>2001</wb:date>
<wb:value>6559096</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="S3">Caribbean small states</wb:country>
<wb:countryiso3code>CSS</wb:countryiso3code>
<wb:date>2000</wb:date>
<wb:value>6513485</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="B8">Central Europe and the Baltics</wb:country>
<wb:countryiso3code>CEB</wb:countryiso3code>
<wb:date>2001</wb:date>
<wb:value>107660041</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="B8">Central Europe and the Baltics</wb:country>
<wb:countryiso3code>CEB</wb:countryiso3code>
<wb:date>2000</wb:date>
<wb:value>108447824</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="V2">Early-demographic dividend</wb:country>
<wb:countryiso3code>EAR</wb:countryiso3code>
<wb:date>2001</wb:date>
<wb:value>2516662236</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="V2">Early-demographic dividend</wb:country>
<wb:countryiso3code>EAR</wb:countryiso3code>
<wb:date>2000</wb:date>
<wb:value>2472852823</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="Z4">East Asia and Pacific</wb:country>
<wb:countryiso3code>EAS</wb:countryiso3code>
<wb:date>2001</wb:date>
<wb:value>2065912076</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="Z4">East Asia and Pacific</wb:country>
<wb:countryiso3code>EAS</wb:countryiso3code>
<wb:date>2000</wb:date>
<wb:value>2047640119</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="4E">East Asia and Pacific (excluding high income)</wb:country>
<wb:countryiso3code>EAP</wb:countryiso3code>
<wb:date>2001</wb:date>
<wb:value>1833423014</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="4E">East Asia and Pacific (excluding high income)</wb:country>
<wb:countryiso3code>EAP</wb:countryiso3code>
<wb:date>2000</wb:date>
<wb:value>1816455805</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="T4">East Asia and Pacific (IDA and IBRD countries)</wb:country>
<wb:countryiso3code>TEA</wb:countryiso3code>
<wb:date>2001</wb:date>
<wb:value>1810261141</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="T4">East Asia and Pacific (IDA and IBRD countries)</wb:country>
<wb:countryiso3code>TEA</wb:countryiso3code>
<wb:date>2000</wb:date>
<wb:value>1793498351</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="XC">Euro area</wb:country>
<wb:countryiso3code>EMU</wb:countryiso3code>
<wb:date>2001</wb:date>
<wb:value>322547874</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="XC">Euro area</wb:country>
<wb:countryiso3code>EMU</wb:countryiso3code>
<wb:date>2000</wb:date>
<wb:value>321310791</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="Z7">Europe and Central Asia</wb:country>
<wb:countryiso3code>ECS</wb:countryiso3code>
<wb:date>2001</wb:date>
<wb:value>862347940</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="Z7">Europe and Central Asia</wb:country>
<wb:countryiso3code>ECS</wb:countryiso3code>
<wb:date>2000</wb:date>
<wb:value>861278548</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="7E">Europe and Central Asia (excluding high income)</wb:country>
<wb:countryiso3code>ECA</wb:countryiso3code>
<wb:date>2001</wb:date>
<wb:value>369183312</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="7E">Europe and Central Asia (excluding high income)</wb:country>
<wb:countryiso3code>ECA</wb:countryiso3code>
<wb:date>2000</wb:date>
<wb:value>369143668</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="T7">Europe and Central Asia (IDA and IBRD countries)</wb:country>
<wb:countryiso3code>TEC</wb:countryiso3code>
<wb:date>2001</wb:date>
<wb:value>433863000</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="T7">Europe and Central Asia (IDA and IBRD countries)</wb:country>
<wb:countryiso3code>TEC</wb:countryiso3code>
<wb:date>2000</wb:date>
<wb:value>434313570</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="EU">European Union</wb:country>
<wb:countryiso3code>EUU</wb:countryiso3code>
<wb:date>2001</wb:date>
<wb:value>429895628</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="EU">European Union</wb:country>
<wb:countryiso3code>EUU</wb:countryiso3code>
<wb:date>2000</wb:date>
<wb:value>429328624</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="F1">Fragile and conflict affected situations</wb:country>
<wb:countryiso3code>FCS</wb:countryiso3code>
<wb:date>2001</wb:date>
<wb:value>517162716</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="F1">Fragile and conflict affected situations</wb:country>
<wb:countryiso3code>FCS</wb:countryiso3code>
<wb:date>2000</wb:date>
<wb:value>504450718</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="XE">Heavily indebted poor countries (HIPC)</wb:country>
<wb:countryiso3code>HPC</wb:countryiso3code>
<wb:date>2001</wb:date>
<wb:value>485112686</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="XE">Heavily indebted poor countries (HIPC)</wb:country>
<wb:countryiso3code>HPC</wb:countryiso3code>
<wb:date>2000</wb:date>
<wb:value>471680794</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="XD">High income</wb:country>
<wb:countryiso3code/>
<wb:date>2001</wb:date>
<wb:value>1108227429</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="XD">High income</wb:country>
<wb:countryiso3code/>
<wb:date>2000</wb:date>
<wb:value>1101479757</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="XF">IBRD only</wb:country>
<wb:countryiso3code>IBD</wb:countryiso3code>
<wb:date>2001</wb:date>
<wb:value>4032822516</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="XF">IBRD only</wb:country>
<wb:countryiso3code>IBD</wb:countryiso3code>
<wb:date>2000</wb:date>
<wb:value>3987195304</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="ZT">IDA and IBRD total</wb:country>
<wb:countryiso3code>IBT</wb:countryiso3code>
<wb:date>2001</wb:date>
<wb:value>5137401888</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="ZT">IDA and IBRD total</wb:country>
<wb:countryiso3code>IBT</wb:countryiso3code>
<wb:date>2000</wb:date>
<wb:value>5065364308</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="XH">IDA blend</wb:country>
<wb:countryiso3code>IDB</wb:countryiso3code>
<wb:date>2001</wb:date>
<wb:value>368820681</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="XH">IDA blend</wb:country>
<wb:countryiso3code>IDB</wb:countryiso3code>
<wb:date>2000</wb:date>
<wb:value>360173360</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="XI">IDA only</wb:country>
<wb:countryiso3code>IDX</wb:countryiso3code>
<wb:date>2001</wb:date>
<wb:value>735758691</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="XI">IDA only</wb:country>
<wb:countryiso3code>IDX</wb:countryiso3code>
<wb:date>2000</wb:date>
<wb:value>717995644</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="XG">IDA total</wb:country>
<wb:countryiso3code>IDA</wb:countryiso3code>
<wb:date>2001</wb:date>
<wb:value>1104579372</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="XG">IDA total</wb:country>
<wb:countryiso3code>IDA</wb:countryiso3code>
<wb:date>2000</wb:date>
<wb:value>1078169004</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="V3">Late-demographic dividend</wb:country>
<wb:countryiso3code>LTE</wb:countryiso3code>
<wb:date>2001</wb:date>
<wb:value>2059873511</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="V3">Late-demographic dividend</wb:country>
<wb:countryiso3code>LTE</wb:countryiso3code>
<wb:date>2000</wb:date>
<wb:value>2045125926</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="ZJ">Latin America and Caribbean</wb:country>
<wb:countryiso3code>LCN</wb:countryiso3code>
<wb:date>2001</wb:date>
<wb:value>528283173</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="ZJ">Latin America and Caribbean</wb:country>
<wb:countryiso3code>LCN</wb:countryiso3code>
<wb:date>2000</wb:date>
<wb:value>520903449</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="XJ">Latin America and Caribbean (excluding high income)</wb:country>
<wb:countryiso3code>LAC</wb:countryiso3code>
<wb:date>2001</wb:date>
<wb:value>500087474</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="XJ">Latin America and Caribbean (excluding high income)</wb:country>
<wb:countryiso3code>LAC</wb:countryiso3code>
<wb:date>2000</wb:date>
<wb:value>492968031</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="T2">Latin America and the Caribbean (IDA and IBRD countries)</wb:country>
<wb:countryiso3code>TLA</wb:countryiso3code>
<wb:date>2001</wb:date>
<wb:value>512247484</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="T2">Latin America and the Caribbean (IDA and IBRD countries)</wb:country>
<wb:countryiso3code>TLA</wb:countryiso3code>
<wb:date>2000</wb:date>
<wb:value>504921261</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="XL">Least developed countries: UN classification</wb:country>
<wb:countryiso3code>LDC</wb:countryiso3code>
<wb:date>2001</wb:date>
<wb:value>673903112</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
<wb:data>
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
<wb:country id="XL">Least developed countries: UN classification</wb:country>
<wb:countryiso3code>LDC</wb:countryiso3code>
<wb:date>2000</wb:date>
<wb:value>657215864</wb:value>
<wb:unit/>
<wb:obs_status/>
<wb:decimal>0</wb:decimal>
</wb:data>
</wb:data>

View File

@ -35,6 +35,7 @@ package com.google.refine.importers;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
@ -553,6 +554,96 @@ public class XmlImportUtilitiesTests extends RefineTest {
}
/**
* Validates the output records data with Input as Xml containing whitespaces
* <p>
* Fix: Issue#1095 :: Open XML file from URL generates lots of empty lines
*/
@Test
public void processRecordsFromXmlWithWhiteSpacesBeforeTagsTest() throws IOException {
loadData(_getXmlDataFromFile("xml-sample-format-1.xml"));
createXmlParser();
ParserSkip();
try {
SUT.processRecordWrapper(project, parser, columnGroup, false, false, false);
} catch (Exception e) {
Assert.fail("Failed to parse records from the given XML Data. Reason: " + e.getMessage(), e);
}
Assert.assertNotNull(project.rows, "Checks the record count of project");
Assert.assertEquals(project.rows.size(), 3, "Checks the number of records parsed from Xml");
Row row = project.rows.get(0);
Assert.assertNotNull(row, "Checks the row instance with index '0'");
Assert.assertEquals(row.cells.size(), 4, "Checks the row cells count");
Assert.assertNotNull(row.getCell(1), "Checks the cell instance at index '1'");
Assert.assertEquals(row.getCell(1).value, "author1", "Checks the value for 'author-name'");
Assert.assertNotNull(row.getCell(2), "Checks the cell instance at index '2'");
Assert.assertEquals(row.getCell(2).value, "a date", "Checks the value for 'author-dob'");
}
@Test
public void processRecordsFromComplexXmlWithTagsHavingWhitespaces() throws IOException {
loadData(_getXmlDataFromFile("xml-sample-format-2.xml"));
createXmlParser();
ParserSkip();
try {
SUT.processRecordWrapper(project, parser, columnGroup, false, false, false);
} catch (Exception e) {
Assert.fail("Failed to parse records from the given XML Data. Reason: " + e.getMessage(), e);
}
Assert.assertNotNull(project.rows, "Checks the record count of project");
Assert.assertEquals(project.rows.size(), 3, "Checks the number of records parsed from Xml");
Row row = project.rows.get(0);
Assert.assertNotNull(row, "Checks the row instance with index '0'");
Assert.assertEquals(row.cells.size(), 4, "Checks the row cells count");
Assert.assertNotNull(row.getCell(1), "Checks the cell instance at index '1'");
Assert.assertEquals(row.getCell(1).value, "author1", "Checks the value for first item");
Assert.assertNotNull(row.getCell(2), "Checks the cell instance at index '2'");
Assert.assertEquals(row.getCell(2).value, "a date", "Checks the value for 'author-dob'");
}
@Test
public void processRecordsFromXMLWithDataHavingWhitespaces() throws IOException {
loadData(_getXmlDataFromFile("xml-sample-format-3.xml"));
createXmlParser();
ParserSkip();
try {
SUT.processRecordWrapper(project, parser, columnGroup, false, false, false);
} catch (Exception e) {
Assert.fail("Failed to parse records from the given XML Data. Reason: " + e.getMessage(), e);
}
Assert.assertNotNull(project.rows, "Checks the record count of project");
Assert.assertEquals(project.rows.size(), 3, "Checks the number of records parsed from Xml");
Row row = project.rows.get(0);
Assert.assertNotNull(row, "Checks the row instance with index '0'");
Assert.assertEquals(row.cells.size(), 4, "Checks the row cells count");
Assert.assertNotNull(row.getCell(1), "Checks the cell instance at index '1'");
Assert.assertEquals(row.getCell(1).value.toString().substring(2, 9), "author1", "Checks the value for first item");
Assert.assertNotNull(row.getCell(2), "Checks the cell instance at index '2'");
Assert.assertEquals(row.getCell(2).value.toString().substring(2, 8), "a date", "Checks the value for 'author-dob'");
}
@Test
public void processRecordsFromComplexXmlStructure() throws IOException {
loadData(_getXmlDataFromFile("xml-sample-format-4.xml"));
createXmlParser();
ParserSkip();
try {
SUT.processRecordWrapper(project, parser, columnGroup, false, false, false);
} catch (Exception e) {
Assert.fail("Failed to parse records from the given XML Data. Reason: " + e.getMessage(), e);
}
Assert.assertNotNull(project.rows, "Checks the record count of project");
Assert.assertEquals(project.rows.size(), 50, "Checks the number of records parsed from Xml");
Row row = project.rows.get(0);
Assert.assertNotNull(row, "Checks the row instance with index '0'");
Assert.assertEquals(row.cells.size(), 14, "Checks the row cells count");
Assert.assertNotNull(row.getCell(1), "Checks the cell instance at index '1'");
Assert.assertEquals(row.getCell(1).value, "11", "Checks the value for 'pages'");
Assert.assertNotNull(row.getCell(2), "Checks the cell instance at index '2'");
Assert.assertEquals(row.getCell(2).value, "50", "Checks the value for 'per-page'");
}
//----------------helpers-------------
public void loadSampleXml(){
loadData( XmlImporterTests.getSample() );
@ -594,4 +685,11 @@ public class XmlImportUtilitiesTests extends RefineTest {
parser = new JSONTreeReader(inputStream);
return parser;
}
private String _getXmlDataFromFile(String fileName) throws IOException {
InputStream in = this.getClass().getClassLoader()
.getResourceAsStream(fileName);
String content = org.apache.commons.io.IOUtils.toString(in, "UTF-8");
return content;
}
}