Fix: Preventing addition of any empty cells with whitespaces while importing Xml Data with Tests #1095 (#3357)
* Fix: Preventing addition of any empty cells with whitespaces while importing Xml data with Tests : Issue #1095 * Chore: Using 'CharMatcher' to match whitespace pattern instead of using custom regex : Issue #1095
This commit is contained in:
parent
6edfda79a3
commit
4f97fd55a5
@ -46,6 +46,7 @@ import javax.xml.stream.XMLStreamConstants;
|
||||
import javax.xml.stream.XMLStreamException;
|
||||
import javax.xml.stream.XMLStreamReader;
|
||||
|
||||
import com.google.common.base.CharMatcher;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@ -209,7 +210,8 @@ public class XmlImporter extends TreeImportingParserBase {
|
||||
|
||||
static public class XmlParser implements TreeReader {
|
||||
final protected XMLStreamReader parser;
|
||||
|
||||
static final int WHITESPACE_CHARACTERS_TOKEN = 15;
|
||||
|
||||
public XmlParser(InputStream inputStream) throws XMLStreamException, IOException {
|
||||
parser = createXMLStreamReader(inputStream);
|
||||
}
|
||||
@ -230,7 +232,15 @@ public class XmlImporter extends TreeImportingParserBase {
|
||||
} catch (XMLStreamException e) {
|
||||
throw new TreeReaderException(e);
|
||||
}
|
||||
|
||||
// Issue #1095 : Preventing addition of empty cells containing whitespaces in the table
|
||||
// Whitespaces between tags will be parsed as Characters by default
|
||||
// Updates the token if the text value is a whitespace
|
||||
if (currentToken == XMLStreamConstants.CHARACTERS) {
|
||||
String text = parser.getText();
|
||||
if (!text.isEmpty() && CharMatcher.whitespace().matchesAllOf(text)) {
|
||||
currentToken = WHITESPACE_CHARACTERS_TOKEN;
|
||||
}
|
||||
}
|
||||
return mapToToken(currentToken);
|
||||
}
|
||||
|
||||
@ -250,6 +260,7 @@ public class XmlImporter extends TreeImportingParserBase {
|
||||
case XMLStreamConstants.COMMENT: return Token.Ignorable;
|
||||
case XMLStreamConstants.CDATA: return Token.Ignorable;
|
||||
case XMLStreamConstants.ATTRIBUTE: return Token.Ignorable;
|
||||
case WHITESPACE_CHARACTERS_TOKEN: return Token.Ignorable;
|
||||
default:
|
||||
return Token.Ignorable;
|
||||
}
|
||||
|
24
main/tests/data/xml-sample-format-1.xml
Normal file
24
main/tests/data/xml-sample-format-1.xml
Normal file
@ -0,0 +1,24 @@
|
||||
<?xml version = "1.0"?>
|
||||
<library>
|
||||
<book id="1">
|
||||
<author>
|
||||
<author-name>author1</author-name>
|
||||
<author-dob>a date</author-dob>
|
||||
</author>
|
||||
<genre>genre1</genre>
|
||||
</book>
|
||||
<book id="2">
|
||||
<author>
|
||||
<author-name>author2</author-name>
|
||||
<author-dob>a date2</author-dob>
|
||||
</author>
|
||||
<genre>genre2</genre>
|
||||
</book>
|
||||
<book id="3">
|
||||
<author>
|
||||
<author-name>author3</author-name>
|
||||
<author-dob>a date3</author-dob>
|
||||
</author>
|
||||
<genre>genre3</genre>
|
||||
</book>
|
||||
</library>
|
24
main/tests/data/xml-sample-format-2.xml
Normal file
24
main/tests/data/xml-sample-format-2.xml
Normal file
@ -0,0 +1,24 @@
|
||||
<?xml version = "1.0"?>
|
||||
<library >
|
||||
<book id = "1">
|
||||
<author >
|
||||
<author-name >author1</author-name >
|
||||
<author-dob >a date</author-dob >
|
||||
</author >
|
||||
<genre >genre1</genre >
|
||||
</book >
|
||||
<book id = "2">
|
||||
<author >
|
||||
<author-name >author2</author-name >
|
||||
<author-dob >a date2</author-dob >
|
||||
</author >
|
||||
<genre >genre2</genre >
|
||||
</book >
|
||||
<book id = "3">
|
||||
<author >
|
||||
<author-name >author3</author-name >
|
||||
<author-dob >a date3</author-dob >
|
||||
</author >
|
||||
<genre >genre3</genre >
|
||||
</book >
|
||||
</library >
|
24
main/tests/data/xml-sample-format-3.xml
Normal file
24
main/tests/data/xml-sample-format-3.xml
Normal file
@ -0,0 +1,24 @@
|
||||
<?xml version = "1.0"?>
|
||||
<library>
|
||||
<book id=" 1 ">
|
||||
<author>
|
||||
<author-name> author1 </author-name>
|
||||
<author-dob> a date </author-dob>
|
||||
</author>
|
||||
<genre> genre1 </genre>
|
||||
</book>
|
||||
<book id=" 2 ">
|
||||
<author>
|
||||
<author-name> author2 </author-name>
|
||||
<author-dob> a date2 </author-dob>
|
||||
</author>
|
||||
<genre> genre2 </genre>
|
||||
</book>
|
||||
<book id=" 3 ">
|
||||
<author>
|
||||
<author-name> author3 </author-name>
|
||||
<author-dob> a date3 </author-dob>
|
||||
</author>
|
||||
<genre> genre3 </genre>
|
||||
</book>
|
||||
</library>
|
503
main/tests/data/xml-sample-format-4.xml
Normal file
503
main/tests/data/xml-sample-format-4.xml
Normal file
@ -0,0 +1,503 @@
|
||||
<wb:data xmlns:wb="http://www.worldbank.org" page="1" pages="11" per_page="50" total="528" sourceid="2" lastupdated="2020-10-15">
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="1A">Arab World</wb:country>
|
||||
<wb:countryiso3code>ARB</wb:countryiso3code>
|
||||
<wb:date>2001</wb:date>
|
||||
<wb:value>288432163</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="1A">Arab World</wb:country>
|
||||
<wb:countryiso3code>ARB</wb:countryiso3code>
|
||||
<wb:date>2000</wb:date>
|
||||
<wb:value>282344154</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="S3">Caribbean small states</wb:country>
|
||||
<wb:countryiso3code>CSS</wb:countryiso3code>
|
||||
<wb:date>2001</wb:date>
|
||||
<wb:value>6559096</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="S3">Caribbean small states</wb:country>
|
||||
<wb:countryiso3code>CSS</wb:countryiso3code>
|
||||
<wb:date>2000</wb:date>
|
||||
<wb:value>6513485</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="B8">Central Europe and the Baltics</wb:country>
|
||||
<wb:countryiso3code>CEB</wb:countryiso3code>
|
||||
<wb:date>2001</wb:date>
|
||||
<wb:value>107660041</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="B8">Central Europe and the Baltics</wb:country>
|
||||
<wb:countryiso3code>CEB</wb:countryiso3code>
|
||||
<wb:date>2000</wb:date>
|
||||
<wb:value>108447824</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="V2">Early-demographic dividend</wb:country>
|
||||
<wb:countryiso3code>EAR</wb:countryiso3code>
|
||||
<wb:date>2001</wb:date>
|
||||
<wb:value>2516662236</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="V2">Early-demographic dividend</wb:country>
|
||||
<wb:countryiso3code>EAR</wb:countryiso3code>
|
||||
<wb:date>2000</wb:date>
|
||||
<wb:value>2472852823</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="Z4">East Asia and Pacific</wb:country>
|
||||
<wb:countryiso3code>EAS</wb:countryiso3code>
|
||||
<wb:date>2001</wb:date>
|
||||
<wb:value>2065912076</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="Z4">East Asia and Pacific</wb:country>
|
||||
<wb:countryiso3code>EAS</wb:countryiso3code>
|
||||
<wb:date>2000</wb:date>
|
||||
<wb:value>2047640119</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="4E">East Asia and Pacific (excluding high income)</wb:country>
|
||||
<wb:countryiso3code>EAP</wb:countryiso3code>
|
||||
<wb:date>2001</wb:date>
|
||||
<wb:value>1833423014</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="4E">East Asia and Pacific (excluding high income)</wb:country>
|
||||
<wb:countryiso3code>EAP</wb:countryiso3code>
|
||||
<wb:date>2000</wb:date>
|
||||
<wb:value>1816455805</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="T4">East Asia and Pacific (IDA and IBRD countries)</wb:country>
|
||||
<wb:countryiso3code>TEA</wb:countryiso3code>
|
||||
<wb:date>2001</wb:date>
|
||||
<wb:value>1810261141</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="T4">East Asia and Pacific (IDA and IBRD countries)</wb:country>
|
||||
<wb:countryiso3code>TEA</wb:countryiso3code>
|
||||
<wb:date>2000</wb:date>
|
||||
<wb:value>1793498351</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="XC">Euro area</wb:country>
|
||||
<wb:countryiso3code>EMU</wb:countryiso3code>
|
||||
<wb:date>2001</wb:date>
|
||||
<wb:value>322547874</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="XC">Euro area</wb:country>
|
||||
<wb:countryiso3code>EMU</wb:countryiso3code>
|
||||
<wb:date>2000</wb:date>
|
||||
<wb:value>321310791</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="Z7">Europe and Central Asia</wb:country>
|
||||
<wb:countryiso3code>ECS</wb:countryiso3code>
|
||||
<wb:date>2001</wb:date>
|
||||
<wb:value>862347940</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="Z7">Europe and Central Asia</wb:country>
|
||||
<wb:countryiso3code>ECS</wb:countryiso3code>
|
||||
<wb:date>2000</wb:date>
|
||||
<wb:value>861278548</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="7E">Europe and Central Asia (excluding high income)</wb:country>
|
||||
<wb:countryiso3code>ECA</wb:countryiso3code>
|
||||
<wb:date>2001</wb:date>
|
||||
<wb:value>369183312</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="7E">Europe and Central Asia (excluding high income)</wb:country>
|
||||
<wb:countryiso3code>ECA</wb:countryiso3code>
|
||||
<wb:date>2000</wb:date>
|
||||
<wb:value>369143668</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="T7">Europe and Central Asia (IDA and IBRD countries)</wb:country>
|
||||
<wb:countryiso3code>TEC</wb:countryiso3code>
|
||||
<wb:date>2001</wb:date>
|
||||
<wb:value>433863000</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="T7">Europe and Central Asia (IDA and IBRD countries)</wb:country>
|
||||
<wb:countryiso3code>TEC</wb:countryiso3code>
|
||||
<wb:date>2000</wb:date>
|
||||
<wb:value>434313570</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="EU">European Union</wb:country>
|
||||
<wb:countryiso3code>EUU</wb:countryiso3code>
|
||||
<wb:date>2001</wb:date>
|
||||
<wb:value>429895628</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="EU">European Union</wb:country>
|
||||
<wb:countryiso3code>EUU</wb:countryiso3code>
|
||||
<wb:date>2000</wb:date>
|
||||
<wb:value>429328624</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="F1">Fragile and conflict affected situations</wb:country>
|
||||
<wb:countryiso3code>FCS</wb:countryiso3code>
|
||||
<wb:date>2001</wb:date>
|
||||
<wb:value>517162716</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="F1">Fragile and conflict affected situations</wb:country>
|
||||
<wb:countryiso3code>FCS</wb:countryiso3code>
|
||||
<wb:date>2000</wb:date>
|
||||
<wb:value>504450718</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="XE">Heavily indebted poor countries (HIPC)</wb:country>
|
||||
<wb:countryiso3code>HPC</wb:countryiso3code>
|
||||
<wb:date>2001</wb:date>
|
||||
<wb:value>485112686</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="XE">Heavily indebted poor countries (HIPC)</wb:country>
|
||||
<wb:countryiso3code>HPC</wb:countryiso3code>
|
||||
<wb:date>2000</wb:date>
|
||||
<wb:value>471680794</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="XD">High income</wb:country>
|
||||
<wb:countryiso3code/>
|
||||
<wb:date>2001</wb:date>
|
||||
<wb:value>1108227429</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="XD">High income</wb:country>
|
||||
<wb:countryiso3code/>
|
||||
<wb:date>2000</wb:date>
|
||||
<wb:value>1101479757</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="XF">IBRD only</wb:country>
|
||||
<wb:countryiso3code>IBD</wb:countryiso3code>
|
||||
<wb:date>2001</wb:date>
|
||||
<wb:value>4032822516</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="XF">IBRD only</wb:country>
|
||||
<wb:countryiso3code>IBD</wb:countryiso3code>
|
||||
<wb:date>2000</wb:date>
|
||||
<wb:value>3987195304</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="ZT">IDA and IBRD total</wb:country>
|
||||
<wb:countryiso3code>IBT</wb:countryiso3code>
|
||||
<wb:date>2001</wb:date>
|
||||
<wb:value>5137401888</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="ZT">IDA and IBRD total</wb:country>
|
||||
<wb:countryiso3code>IBT</wb:countryiso3code>
|
||||
<wb:date>2000</wb:date>
|
||||
<wb:value>5065364308</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="XH">IDA blend</wb:country>
|
||||
<wb:countryiso3code>IDB</wb:countryiso3code>
|
||||
<wb:date>2001</wb:date>
|
||||
<wb:value>368820681</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="XH">IDA blend</wb:country>
|
||||
<wb:countryiso3code>IDB</wb:countryiso3code>
|
||||
<wb:date>2000</wb:date>
|
||||
<wb:value>360173360</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="XI">IDA only</wb:country>
|
||||
<wb:countryiso3code>IDX</wb:countryiso3code>
|
||||
<wb:date>2001</wb:date>
|
||||
<wb:value>735758691</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="XI">IDA only</wb:country>
|
||||
<wb:countryiso3code>IDX</wb:countryiso3code>
|
||||
<wb:date>2000</wb:date>
|
||||
<wb:value>717995644</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="XG">IDA total</wb:country>
|
||||
<wb:countryiso3code>IDA</wb:countryiso3code>
|
||||
<wb:date>2001</wb:date>
|
||||
<wb:value>1104579372</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="XG">IDA total</wb:country>
|
||||
<wb:countryiso3code>IDA</wb:countryiso3code>
|
||||
<wb:date>2000</wb:date>
|
||||
<wb:value>1078169004</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="V3">Late-demographic dividend</wb:country>
|
||||
<wb:countryiso3code>LTE</wb:countryiso3code>
|
||||
<wb:date>2001</wb:date>
|
||||
<wb:value>2059873511</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="V3">Late-demographic dividend</wb:country>
|
||||
<wb:countryiso3code>LTE</wb:countryiso3code>
|
||||
<wb:date>2000</wb:date>
|
||||
<wb:value>2045125926</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="ZJ">Latin America and Caribbean</wb:country>
|
||||
<wb:countryiso3code>LCN</wb:countryiso3code>
|
||||
<wb:date>2001</wb:date>
|
||||
<wb:value>528283173</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="ZJ">Latin America and Caribbean</wb:country>
|
||||
<wb:countryiso3code>LCN</wb:countryiso3code>
|
||||
<wb:date>2000</wb:date>
|
||||
<wb:value>520903449</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="XJ">Latin America and Caribbean (excluding high income)</wb:country>
|
||||
<wb:countryiso3code>LAC</wb:countryiso3code>
|
||||
<wb:date>2001</wb:date>
|
||||
<wb:value>500087474</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="XJ">Latin America and Caribbean (excluding high income)</wb:country>
|
||||
<wb:countryiso3code>LAC</wb:countryiso3code>
|
||||
<wb:date>2000</wb:date>
|
||||
<wb:value>492968031</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="T2">Latin America and the Caribbean (IDA and IBRD countries)</wb:country>
|
||||
<wb:countryiso3code>TLA</wb:countryiso3code>
|
||||
<wb:date>2001</wb:date>
|
||||
<wb:value>512247484</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="T2">Latin America and the Caribbean (IDA and IBRD countries)</wb:country>
|
||||
<wb:countryiso3code>TLA</wb:countryiso3code>
|
||||
<wb:date>2000</wb:date>
|
||||
<wb:value>504921261</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="XL">Least developed countries: UN classification</wb:country>
|
||||
<wb:countryiso3code>LDC</wb:countryiso3code>
|
||||
<wb:date>2001</wb:date>
|
||||
<wb:value>673903112</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
<wb:data>
|
||||
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||
<wb:country id="XL">Least developed countries: UN classification</wb:country>
|
||||
<wb:countryiso3code>LDC</wb:countryiso3code>
|
||||
<wb:date>2000</wb:date>
|
||||
<wb:value>657215864</wb:value>
|
||||
<wb:unit/>
|
||||
<wb:obs_status/>
|
||||
<wb:decimal>0</wb:decimal>
|
||||
</wb:data>
|
||||
</wb:data>
|
@ -35,6 +35,7 @@ package com.google.refine.importers;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
@ -553,6 +554,96 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates the output records data with Input as Xml containing whitespaces
|
||||
* <p>
|
||||
* Fix: Issue#1095 :: Open XML file from URL generates lots of empty lines
|
||||
*/
|
||||
@Test
|
||||
public void processRecordsFromXmlWithWhiteSpacesBeforeTagsTest() throws IOException {
|
||||
loadData(_getXmlDataFromFile("xml-sample-format-1.xml"));
|
||||
createXmlParser();
|
||||
ParserSkip();
|
||||
try {
|
||||
SUT.processRecordWrapper(project, parser, columnGroup, false, false, false);
|
||||
} catch (Exception e) {
|
||||
Assert.fail("Failed to parse records from the given XML Data. Reason: " + e.getMessage(), e);
|
||||
}
|
||||
Assert.assertNotNull(project.rows, "Checks the record count of project");
|
||||
Assert.assertEquals(project.rows.size(), 3, "Checks the number of records parsed from Xml");
|
||||
Row row = project.rows.get(0);
|
||||
Assert.assertNotNull(row, "Checks the row instance with index '0'");
|
||||
Assert.assertEquals(row.cells.size(), 4, "Checks the row cells count");
|
||||
Assert.assertNotNull(row.getCell(1), "Checks the cell instance at index '1'");
|
||||
Assert.assertEquals(row.getCell(1).value, "author1", "Checks the value for 'author-name'");
|
||||
Assert.assertNotNull(row.getCell(2), "Checks the cell instance at index '2'");
|
||||
Assert.assertEquals(row.getCell(2).value, "a date", "Checks the value for 'author-dob'");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void processRecordsFromComplexXmlWithTagsHavingWhitespaces() throws IOException {
|
||||
loadData(_getXmlDataFromFile("xml-sample-format-2.xml"));
|
||||
createXmlParser();
|
||||
ParserSkip();
|
||||
try {
|
||||
SUT.processRecordWrapper(project, parser, columnGroup, false, false, false);
|
||||
} catch (Exception e) {
|
||||
Assert.fail("Failed to parse records from the given XML Data. Reason: " + e.getMessage(), e);
|
||||
}
|
||||
Assert.assertNotNull(project.rows, "Checks the record count of project");
|
||||
Assert.assertEquals(project.rows.size(), 3, "Checks the number of records parsed from Xml");
|
||||
Row row = project.rows.get(0);
|
||||
Assert.assertNotNull(row, "Checks the row instance with index '0'");
|
||||
Assert.assertEquals(row.cells.size(), 4, "Checks the row cells count");
|
||||
Assert.assertNotNull(row.getCell(1), "Checks the cell instance at index '1'");
|
||||
Assert.assertEquals(row.getCell(1).value, "author1", "Checks the value for first item");
|
||||
Assert.assertNotNull(row.getCell(2), "Checks the cell instance at index '2'");
|
||||
Assert.assertEquals(row.getCell(2).value, "a date", "Checks the value for 'author-dob'");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void processRecordsFromXMLWithDataHavingWhitespaces() throws IOException {
|
||||
loadData(_getXmlDataFromFile("xml-sample-format-3.xml"));
|
||||
createXmlParser();
|
||||
ParserSkip();
|
||||
try {
|
||||
SUT.processRecordWrapper(project, parser, columnGroup, false, false, false);
|
||||
} catch (Exception e) {
|
||||
Assert.fail("Failed to parse records from the given XML Data. Reason: " + e.getMessage(), e);
|
||||
}
|
||||
Assert.assertNotNull(project.rows, "Checks the record count of project");
|
||||
Assert.assertEquals(project.rows.size(), 3, "Checks the number of records parsed from Xml");
|
||||
Row row = project.rows.get(0);
|
||||
Assert.assertNotNull(row, "Checks the row instance with index '0'");
|
||||
Assert.assertEquals(row.cells.size(), 4, "Checks the row cells count");
|
||||
Assert.assertNotNull(row.getCell(1), "Checks the cell instance at index '1'");
|
||||
Assert.assertEquals(row.getCell(1).value.toString().substring(2, 9), "author1", "Checks the value for first item");
|
||||
Assert.assertNotNull(row.getCell(2), "Checks the cell instance at index '2'");
|
||||
Assert.assertEquals(row.getCell(2).value.toString().substring(2, 8), "a date", "Checks the value for 'author-dob'");
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void processRecordsFromComplexXmlStructure() throws IOException {
|
||||
loadData(_getXmlDataFromFile("xml-sample-format-4.xml"));
|
||||
createXmlParser();
|
||||
ParserSkip();
|
||||
try {
|
||||
SUT.processRecordWrapper(project, parser, columnGroup, false, false, false);
|
||||
} catch (Exception e) {
|
||||
Assert.fail("Failed to parse records from the given XML Data. Reason: " + e.getMessage(), e);
|
||||
}
|
||||
Assert.assertNotNull(project.rows, "Checks the record count of project");
|
||||
Assert.assertEquals(project.rows.size(), 50, "Checks the number of records parsed from Xml");
|
||||
Row row = project.rows.get(0);
|
||||
Assert.assertNotNull(row, "Checks the row instance with index '0'");
|
||||
Assert.assertEquals(row.cells.size(), 14, "Checks the row cells count");
|
||||
Assert.assertNotNull(row.getCell(1), "Checks the cell instance at index '1'");
|
||||
Assert.assertEquals(row.getCell(1).value, "11", "Checks the value for 'pages'");
|
||||
Assert.assertNotNull(row.getCell(2), "Checks the cell instance at index '2'");
|
||||
Assert.assertEquals(row.getCell(2).value, "50", "Checks the value for 'per-page'");
|
||||
}
|
||||
|
||||
//----------------helpers-------------
|
||||
public void loadSampleXml(){
|
||||
loadData( XmlImporterTests.getSample() );
|
||||
@ -594,4 +685,11 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
||||
parser = new JSONTreeReader(inputStream);
|
||||
return parser;
|
||||
}
|
||||
|
||||
private String _getXmlDataFromFile(String fileName) throws IOException {
|
||||
InputStream in = this.getClass().getClassLoader()
|
||||
.getResourceAsStream(fileName);
|
||||
String content = org.apache.commons.io.IOUtils.toString(in, "UTF-8");
|
||||
return content;
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user