Fix: Preventing addition of any empty cells with whitespaces while importing Xml Data with Tests #1095 (#3357)
* Fix: Preventing addition of any empty cells with whitespaces while importing Xml data with Tests : Issue #1095 * Chore: Using 'CharMatcher' to match whitespace pattern instead of using custom regex : Issue #1095
This commit is contained in:
parent
6edfda79a3
commit
4f97fd55a5
@ -46,6 +46,7 @@ import javax.xml.stream.XMLStreamConstants;
|
|||||||
import javax.xml.stream.XMLStreamException;
|
import javax.xml.stream.XMLStreamException;
|
||||||
import javax.xml.stream.XMLStreamReader;
|
import javax.xml.stream.XMLStreamReader;
|
||||||
|
|
||||||
|
import com.google.common.base.CharMatcher;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
@ -209,6 +210,7 @@ public class XmlImporter extends TreeImportingParserBase {
|
|||||||
|
|
||||||
static public class XmlParser implements TreeReader {
|
static public class XmlParser implements TreeReader {
|
||||||
final protected XMLStreamReader parser;
|
final protected XMLStreamReader parser;
|
||||||
|
static final int WHITESPACE_CHARACTERS_TOKEN = 15;
|
||||||
|
|
||||||
public XmlParser(InputStream inputStream) throws XMLStreamException, IOException {
|
public XmlParser(InputStream inputStream) throws XMLStreamException, IOException {
|
||||||
parser = createXMLStreamReader(inputStream);
|
parser = createXMLStreamReader(inputStream);
|
||||||
@ -230,7 +232,15 @@ public class XmlImporter extends TreeImportingParserBase {
|
|||||||
} catch (XMLStreamException e) {
|
} catch (XMLStreamException e) {
|
||||||
throw new TreeReaderException(e);
|
throw new TreeReaderException(e);
|
||||||
}
|
}
|
||||||
|
// Issue #1095 : Preventing addition of empty cells containing whitespaces in the table
|
||||||
|
// Whitespaces between tags will be parsed as Characters by default
|
||||||
|
// Updates the token if the text value is a whitespace
|
||||||
|
if (currentToken == XMLStreamConstants.CHARACTERS) {
|
||||||
|
String text = parser.getText();
|
||||||
|
if (!text.isEmpty() && CharMatcher.whitespace().matchesAllOf(text)) {
|
||||||
|
currentToken = WHITESPACE_CHARACTERS_TOKEN;
|
||||||
|
}
|
||||||
|
}
|
||||||
return mapToToken(currentToken);
|
return mapToToken(currentToken);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -250,6 +260,7 @@ public class XmlImporter extends TreeImportingParserBase {
|
|||||||
case XMLStreamConstants.COMMENT: return Token.Ignorable;
|
case XMLStreamConstants.COMMENT: return Token.Ignorable;
|
||||||
case XMLStreamConstants.CDATA: return Token.Ignorable;
|
case XMLStreamConstants.CDATA: return Token.Ignorable;
|
||||||
case XMLStreamConstants.ATTRIBUTE: return Token.Ignorable;
|
case XMLStreamConstants.ATTRIBUTE: return Token.Ignorable;
|
||||||
|
case WHITESPACE_CHARACTERS_TOKEN: return Token.Ignorable;
|
||||||
default:
|
default:
|
||||||
return Token.Ignorable;
|
return Token.Ignorable;
|
||||||
}
|
}
|
||||||
|
24
main/tests/data/xml-sample-format-1.xml
Normal file
24
main/tests/data/xml-sample-format-1.xml
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
<?xml version = "1.0"?>
|
||||||
|
<library>
|
||||||
|
<book id="1">
|
||||||
|
<author>
|
||||||
|
<author-name>author1</author-name>
|
||||||
|
<author-dob>a date</author-dob>
|
||||||
|
</author>
|
||||||
|
<genre>genre1</genre>
|
||||||
|
</book>
|
||||||
|
<book id="2">
|
||||||
|
<author>
|
||||||
|
<author-name>author2</author-name>
|
||||||
|
<author-dob>a date2</author-dob>
|
||||||
|
</author>
|
||||||
|
<genre>genre2</genre>
|
||||||
|
</book>
|
||||||
|
<book id="3">
|
||||||
|
<author>
|
||||||
|
<author-name>author3</author-name>
|
||||||
|
<author-dob>a date3</author-dob>
|
||||||
|
</author>
|
||||||
|
<genre>genre3</genre>
|
||||||
|
</book>
|
||||||
|
</library>
|
24
main/tests/data/xml-sample-format-2.xml
Normal file
24
main/tests/data/xml-sample-format-2.xml
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
<?xml version = "1.0"?>
|
||||||
|
<library >
|
||||||
|
<book id = "1">
|
||||||
|
<author >
|
||||||
|
<author-name >author1</author-name >
|
||||||
|
<author-dob >a date</author-dob >
|
||||||
|
</author >
|
||||||
|
<genre >genre1</genre >
|
||||||
|
</book >
|
||||||
|
<book id = "2">
|
||||||
|
<author >
|
||||||
|
<author-name >author2</author-name >
|
||||||
|
<author-dob >a date2</author-dob >
|
||||||
|
</author >
|
||||||
|
<genre >genre2</genre >
|
||||||
|
</book >
|
||||||
|
<book id = "3">
|
||||||
|
<author >
|
||||||
|
<author-name >author3</author-name >
|
||||||
|
<author-dob >a date3</author-dob >
|
||||||
|
</author >
|
||||||
|
<genre >genre3</genre >
|
||||||
|
</book >
|
||||||
|
</library >
|
24
main/tests/data/xml-sample-format-3.xml
Normal file
24
main/tests/data/xml-sample-format-3.xml
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
<?xml version = "1.0"?>
|
||||||
|
<library>
|
||||||
|
<book id=" 1 ">
|
||||||
|
<author>
|
||||||
|
<author-name> author1 </author-name>
|
||||||
|
<author-dob> a date </author-dob>
|
||||||
|
</author>
|
||||||
|
<genre> genre1 </genre>
|
||||||
|
</book>
|
||||||
|
<book id=" 2 ">
|
||||||
|
<author>
|
||||||
|
<author-name> author2 </author-name>
|
||||||
|
<author-dob> a date2 </author-dob>
|
||||||
|
</author>
|
||||||
|
<genre> genre2 </genre>
|
||||||
|
</book>
|
||||||
|
<book id=" 3 ">
|
||||||
|
<author>
|
||||||
|
<author-name> author3 </author-name>
|
||||||
|
<author-dob> a date3 </author-dob>
|
||||||
|
</author>
|
||||||
|
<genre> genre3 </genre>
|
||||||
|
</book>
|
||||||
|
</library>
|
503
main/tests/data/xml-sample-format-4.xml
Normal file
503
main/tests/data/xml-sample-format-4.xml
Normal file
@ -0,0 +1,503 @@
|
|||||||
|
<wb:data xmlns:wb="http://www.worldbank.org" page="1" pages="11" per_page="50" total="528" sourceid="2" lastupdated="2020-10-15">
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="1A">Arab World</wb:country>
|
||||||
|
<wb:countryiso3code>ARB</wb:countryiso3code>
|
||||||
|
<wb:date>2001</wb:date>
|
||||||
|
<wb:value>288432163</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="1A">Arab World</wb:country>
|
||||||
|
<wb:countryiso3code>ARB</wb:countryiso3code>
|
||||||
|
<wb:date>2000</wb:date>
|
||||||
|
<wb:value>282344154</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="S3">Caribbean small states</wb:country>
|
||||||
|
<wb:countryiso3code>CSS</wb:countryiso3code>
|
||||||
|
<wb:date>2001</wb:date>
|
||||||
|
<wb:value>6559096</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="S3">Caribbean small states</wb:country>
|
||||||
|
<wb:countryiso3code>CSS</wb:countryiso3code>
|
||||||
|
<wb:date>2000</wb:date>
|
||||||
|
<wb:value>6513485</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="B8">Central Europe and the Baltics</wb:country>
|
||||||
|
<wb:countryiso3code>CEB</wb:countryiso3code>
|
||||||
|
<wb:date>2001</wb:date>
|
||||||
|
<wb:value>107660041</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="B8">Central Europe and the Baltics</wb:country>
|
||||||
|
<wb:countryiso3code>CEB</wb:countryiso3code>
|
||||||
|
<wb:date>2000</wb:date>
|
||||||
|
<wb:value>108447824</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="V2">Early-demographic dividend</wb:country>
|
||||||
|
<wb:countryiso3code>EAR</wb:countryiso3code>
|
||||||
|
<wb:date>2001</wb:date>
|
||||||
|
<wb:value>2516662236</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="V2">Early-demographic dividend</wb:country>
|
||||||
|
<wb:countryiso3code>EAR</wb:countryiso3code>
|
||||||
|
<wb:date>2000</wb:date>
|
||||||
|
<wb:value>2472852823</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="Z4">East Asia and Pacific</wb:country>
|
||||||
|
<wb:countryiso3code>EAS</wb:countryiso3code>
|
||||||
|
<wb:date>2001</wb:date>
|
||||||
|
<wb:value>2065912076</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="Z4">East Asia and Pacific</wb:country>
|
||||||
|
<wb:countryiso3code>EAS</wb:countryiso3code>
|
||||||
|
<wb:date>2000</wb:date>
|
||||||
|
<wb:value>2047640119</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="4E">East Asia and Pacific (excluding high income)</wb:country>
|
||||||
|
<wb:countryiso3code>EAP</wb:countryiso3code>
|
||||||
|
<wb:date>2001</wb:date>
|
||||||
|
<wb:value>1833423014</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="4E">East Asia and Pacific (excluding high income)</wb:country>
|
||||||
|
<wb:countryiso3code>EAP</wb:countryiso3code>
|
||||||
|
<wb:date>2000</wb:date>
|
||||||
|
<wb:value>1816455805</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="T4">East Asia and Pacific (IDA and IBRD countries)</wb:country>
|
||||||
|
<wb:countryiso3code>TEA</wb:countryiso3code>
|
||||||
|
<wb:date>2001</wb:date>
|
||||||
|
<wb:value>1810261141</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="T4">East Asia and Pacific (IDA and IBRD countries)</wb:country>
|
||||||
|
<wb:countryiso3code>TEA</wb:countryiso3code>
|
||||||
|
<wb:date>2000</wb:date>
|
||||||
|
<wb:value>1793498351</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="XC">Euro area</wb:country>
|
||||||
|
<wb:countryiso3code>EMU</wb:countryiso3code>
|
||||||
|
<wb:date>2001</wb:date>
|
||||||
|
<wb:value>322547874</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="XC">Euro area</wb:country>
|
||||||
|
<wb:countryiso3code>EMU</wb:countryiso3code>
|
||||||
|
<wb:date>2000</wb:date>
|
||||||
|
<wb:value>321310791</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="Z7">Europe and Central Asia</wb:country>
|
||||||
|
<wb:countryiso3code>ECS</wb:countryiso3code>
|
||||||
|
<wb:date>2001</wb:date>
|
||||||
|
<wb:value>862347940</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="Z7">Europe and Central Asia</wb:country>
|
||||||
|
<wb:countryiso3code>ECS</wb:countryiso3code>
|
||||||
|
<wb:date>2000</wb:date>
|
||||||
|
<wb:value>861278548</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="7E">Europe and Central Asia (excluding high income)</wb:country>
|
||||||
|
<wb:countryiso3code>ECA</wb:countryiso3code>
|
||||||
|
<wb:date>2001</wb:date>
|
||||||
|
<wb:value>369183312</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="7E">Europe and Central Asia (excluding high income)</wb:country>
|
||||||
|
<wb:countryiso3code>ECA</wb:countryiso3code>
|
||||||
|
<wb:date>2000</wb:date>
|
||||||
|
<wb:value>369143668</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="T7">Europe and Central Asia (IDA and IBRD countries)</wb:country>
|
||||||
|
<wb:countryiso3code>TEC</wb:countryiso3code>
|
||||||
|
<wb:date>2001</wb:date>
|
||||||
|
<wb:value>433863000</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="T7">Europe and Central Asia (IDA and IBRD countries)</wb:country>
|
||||||
|
<wb:countryiso3code>TEC</wb:countryiso3code>
|
||||||
|
<wb:date>2000</wb:date>
|
||||||
|
<wb:value>434313570</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="EU">European Union</wb:country>
|
||||||
|
<wb:countryiso3code>EUU</wb:countryiso3code>
|
||||||
|
<wb:date>2001</wb:date>
|
||||||
|
<wb:value>429895628</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="EU">European Union</wb:country>
|
||||||
|
<wb:countryiso3code>EUU</wb:countryiso3code>
|
||||||
|
<wb:date>2000</wb:date>
|
||||||
|
<wb:value>429328624</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="F1">Fragile and conflict affected situations</wb:country>
|
||||||
|
<wb:countryiso3code>FCS</wb:countryiso3code>
|
||||||
|
<wb:date>2001</wb:date>
|
||||||
|
<wb:value>517162716</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="F1">Fragile and conflict affected situations</wb:country>
|
||||||
|
<wb:countryiso3code>FCS</wb:countryiso3code>
|
||||||
|
<wb:date>2000</wb:date>
|
||||||
|
<wb:value>504450718</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="XE">Heavily indebted poor countries (HIPC)</wb:country>
|
||||||
|
<wb:countryiso3code>HPC</wb:countryiso3code>
|
||||||
|
<wb:date>2001</wb:date>
|
||||||
|
<wb:value>485112686</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="XE">Heavily indebted poor countries (HIPC)</wb:country>
|
||||||
|
<wb:countryiso3code>HPC</wb:countryiso3code>
|
||||||
|
<wb:date>2000</wb:date>
|
||||||
|
<wb:value>471680794</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="XD">High income</wb:country>
|
||||||
|
<wb:countryiso3code/>
|
||||||
|
<wb:date>2001</wb:date>
|
||||||
|
<wb:value>1108227429</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="XD">High income</wb:country>
|
||||||
|
<wb:countryiso3code/>
|
||||||
|
<wb:date>2000</wb:date>
|
||||||
|
<wb:value>1101479757</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="XF">IBRD only</wb:country>
|
||||||
|
<wb:countryiso3code>IBD</wb:countryiso3code>
|
||||||
|
<wb:date>2001</wb:date>
|
||||||
|
<wb:value>4032822516</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="XF">IBRD only</wb:country>
|
||||||
|
<wb:countryiso3code>IBD</wb:countryiso3code>
|
||||||
|
<wb:date>2000</wb:date>
|
||||||
|
<wb:value>3987195304</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="ZT">IDA and IBRD total</wb:country>
|
||||||
|
<wb:countryiso3code>IBT</wb:countryiso3code>
|
||||||
|
<wb:date>2001</wb:date>
|
||||||
|
<wb:value>5137401888</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="ZT">IDA and IBRD total</wb:country>
|
||||||
|
<wb:countryiso3code>IBT</wb:countryiso3code>
|
||||||
|
<wb:date>2000</wb:date>
|
||||||
|
<wb:value>5065364308</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="XH">IDA blend</wb:country>
|
||||||
|
<wb:countryiso3code>IDB</wb:countryiso3code>
|
||||||
|
<wb:date>2001</wb:date>
|
||||||
|
<wb:value>368820681</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="XH">IDA blend</wb:country>
|
||||||
|
<wb:countryiso3code>IDB</wb:countryiso3code>
|
||||||
|
<wb:date>2000</wb:date>
|
||||||
|
<wb:value>360173360</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="XI">IDA only</wb:country>
|
||||||
|
<wb:countryiso3code>IDX</wb:countryiso3code>
|
||||||
|
<wb:date>2001</wb:date>
|
||||||
|
<wb:value>735758691</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="XI">IDA only</wb:country>
|
||||||
|
<wb:countryiso3code>IDX</wb:countryiso3code>
|
||||||
|
<wb:date>2000</wb:date>
|
||||||
|
<wb:value>717995644</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="XG">IDA total</wb:country>
|
||||||
|
<wb:countryiso3code>IDA</wb:countryiso3code>
|
||||||
|
<wb:date>2001</wb:date>
|
||||||
|
<wb:value>1104579372</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="XG">IDA total</wb:country>
|
||||||
|
<wb:countryiso3code>IDA</wb:countryiso3code>
|
||||||
|
<wb:date>2000</wb:date>
|
||||||
|
<wb:value>1078169004</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="V3">Late-demographic dividend</wb:country>
|
||||||
|
<wb:countryiso3code>LTE</wb:countryiso3code>
|
||||||
|
<wb:date>2001</wb:date>
|
||||||
|
<wb:value>2059873511</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="V3">Late-demographic dividend</wb:country>
|
||||||
|
<wb:countryiso3code>LTE</wb:countryiso3code>
|
||||||
|
<wb:date>2000</wb:date>
|
||||||
|
<wb:value>2045125926</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="ZJ">Latin America and Caribbean</wb:country>
|
||||||
|
<wb:countryiso3code>LCN</wb:countryiso3code>
|
||||||
|
<wb:date>2001</wb:date>
|
||||||
|
<wb:value>528283173</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="ZJ">Latin America and Caribbean</wb:country>
|
||||||
|
<wb:countryiso3code>LCN</wb:countryiso3code>
|
||||||
|
<wb:date>2000</wb:date>
|
||||||
|
<wb:value>520903449</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="XJ">Latin America and Caribbean (excluding high income)</wb:country>
|
||||||
|
<wb:countryiso3code>LAC</wb:countryiso3code>
|
||||||
|
<wb:date>2001</wb:date>
|
||||||
|
<wb:value>500087474</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="XJ">Latin America and Caribbean (excluding high income)</wb:country>
|
||||||
|
<wb:countryiso3code>LAC</wb:countryiso3code>
|
||||||
|
<wb:date>2000</wb:date>
|
||||||
|
<wb:value>492968031</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="T2">Latin America and the Caribbean (IDA and IBRD countries)</wb:country>
|
||||||
|
<wb:countryiso3code>TLA</wb:countryiso3code>
|
||||||
|
<wb:date>2001</wb:date>
|
||||||
|
<wb:value>512247484</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="T2">Latin America and the Caribbean (IDA and IBRD countries)</wb:country>
|
||||||
|
<wb:countryiso3code>TLA</wb:countryiso3code>
|
||||||
|
<wb:date>2000</wb:date>
|
||||||
|
<wb:value>504921261</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="XL">Least developed countries: UN classification</wb:country>
|
||||||
|
<wb:countryiso3code>LDC</wb:countryiso3code>
|
||||||
|
<wb:date>2001</wb:date>
|
||||||
|
<wb:value>673903112</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
<wb:data>
|
||||||
|
<wb:indicator id="SP.POP.TOTL">Population, total</wb:indicator>
|
||||||
|
<wb:country id="XL">Least developed countries: UN classification</wb:country>
|
||||||
|
<wb:countryiso3code>LDC</wb:countryiso3code>
|
||||||
|
<wb:date>2000</wb:date>
|
||||||
|
<wb:value>657215864</wb:value>
|
||||||
|
<wb:unit/>
|
||||||
|
<wb:obs_status/>
|
||||||
|
<wb:decimal>0</wb:decimal>
|
||||||
|
</wb:data>
|
||||||
|
</wb:data>
|
@ -35,6 +35,7 @@ package com.google.refine.importers;
|
|||||||
|
|
||||||
import java.io.ByteArrayInputStream;
|
import java.io.ByteArrayInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
import java.io.UnsupportedEncodingException;
|
import java.io.UnsupportedEncodingException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
@ -553,6 +554,96 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Validates the output records data with Input as Xml containing whitespaces
|
||||||
|
* <p>
|
||||||
|
* Fix: Issue#1095 :: Open XML file from URL generates lots of empty lines
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void processRecordsFromXmlWithWhiteSpacesBeforeTagsTest() throws IOException {
|
||||||
|
loadData(_getXmlDataFromFile("xml-sample-format-1.xml"));
|
||||||
|
createXmlParser();
|
||||||
|
ParserSkip();
|
||||||
|
try {
|
||||||
|
SUT.processRecordWrapper(project, parser, columnGroup, false, false, false);
|
||||||
|
} catch (Exception e) {
|
||||||
|
Assert.fail("Failed to parse records from the given XML Data. Reason: " + e.getMessage(), e);
|
||||||
|
}
|
||||||
|
Assert.assertNotNull(project.rows, "Checks the record count of project");
|
||||||
|
Assert.assertEquals(project.rows.size(), 3, "Checks the number of records parsed from Xml");
|
||||||
|
Row row = project.rows.get(0);
|
||||||
|
Assert.assertNotNull(row, "Checks the row instance with index '0'");
|
||||||
|
Assert.assertEquals(row.cells.size(), 4, "Checks the row cells count");
|
||||||
|
Assert.assertNotNull(row.getCell(1), "Checks the cell instance at index '1'");
|
||||||
|
Assert.assertEquals(row.getCell(1).value, "author1", "Checks the value for 'author-name'");
|
||||||
|
Assert.assertNotNull(row.getCell(2), "Checks the cell instance at index '2'");
|
||||||
|
Assert.assertEquals(row.getCell(2).value, "a date", "Checks the value for 'author-dob'");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void processRecordsFromComplexXmlWithTagsHavingWhitespaces() throws IOException {
|
||||||
|
loadData(_getXmlDataFromFile("xml-sample-format-2.xml"));
|
||||||
|
createXmlParser();
|
||||||
|
ParserSkip();
|
||||||
|
try {
|
||||||
|
SUT.processRecordWrapper(project, parser, columnGroup, false, false, false);
|
||||||
|
} catch (Exception e) {
|
||||||
|
Assert.fail("Failed to parse records from the given XML Data. Reason: " + e.getMessage(), e);
|
||||||
|
}
|
||||||
|
Assert.assertNotNull(project.rows, "Checks the record count of project");
|
||||||
|
Assert.assertEquals(project.rows.size(), 3, "Checks the number of records parsed from Xml");
|
||||||
|
Row row = project.rows.get(0);
|
||||||
|
Assert.assertNotNull(row, "Checks the row instance with index '0'");
|
||||||
|
Assert.assertEquals(row.cells.size(), 4, "Checks the row cells count");
|
||||||
|
Assert.assertNotNull(row.getCell(1), "Checks the cell instance at index '1'");
|
||||||
|
Assert.assertEquals(row.getCell(1).value, "author1", "Checks the value for first item");
|
||||||
|
Assert.assertNotNull(row.getCell(2), "Checks the cell instance at index '2'");
|
||||||
|
Assert.assertEquals(row.getCell(2).value, "a date", "Checks the value for 'author-dob'");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void processRecordsFromXMLWithDataHavingWhitespaces() throws IOException {
|
||||||
|
loadData(_getXmlDataFromFile("xml-sample-format-3.xml"));
|
||||||
|
createXmlParser();
|
||||||
|
ParserSkip();
|
||||||
|
try {
|
||||||
|
SUT.processRecordWrapper(project, parser, columnGroup, false, false, false);
|
||||||
|
} catch (Exception e) {
|
||||||
|
Assert.fail("Failed to parse records from the given XML Data. Reason: " + e.getMessage(), e);
|
||||||
|
}
|
||||||
|
Assert.assertNotNull(project.rows, "Checks the record count of project");
|
||||||
|
Assert.assertEquals(project.rows.size(), 3, "Checks the number of records parsed from Xml");
|
||||||
|
Row row = project.rows.get(0);
|
||||||
|
Assert.assertNotNull(row, "Checks the row instance with index '0'");
|
||||||
|
Assert.assertEquals(row.cells.size(), 4, "Checks the row cells count");
|
||||||
|
Assert.assertNotNull(row.getCell(1), "Checks the cell instance at index '1'");
|
||||||
|
Assert.assertEquals(row.getCell(1).value.toString().substring(2, 9), "author1", "Checks the value for first item");
|
||||||
|
Assert.assertNotNull(row.getCell(2), "Checks the cell instance at index '2'");
|
||||||
|
Assert.assertEquals(row.getCell(2).value.toString().substring(2, 8), "a date", "Checks the value for 'author-dob'");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void processRecordsFromComplexXmlStructure() throws IOException {
|
||||||
|
loadData(_getXmlDataFromFile("xml-sample-format-4.xml"));
|
||||||
|
createXmlParser();
|
||||||
|
ParserSkip();
|
||||||
|
try {
|
||||||
|
SUT.processRecordWrapper(project, parser, columnGroup, false, false, false);
|
||||||
|
} catch (Exception e) {
|
||||||
|
Assert.fail("Failed to parse records from the given XML Data. Reason: " + e.getMessage(), e);
|
||||||
|
}
|
||||||
|
Assert.assertNotNull(project.rows, "Checks the record count of project");
|
||||||
|
Assert.assertEquals(project.rows.size(), 50, "Checks the number of records parsed from Xml");
|
||||||
|
Row row = project.rows.get(0);
|
||||||
|
Assert.assertNotNull(row, "Checks the row instance with index '0'");
|
||||||
|
Assert.assertEquals(row.cells.size(), 14, "Checks the row cells count");
|
||||||
|
Assert.assertNotNull(row.getCell(1), "Checks the cell instance at index '1'");
|
||||||
|
Assert.assertEquals(row.getCell(1).value, "11", "Checks the value for 'pages'");
|
||||||
|
Assert.assertNotNull(row.getCell(2), "Checks the cell instance at index '2'");
|
||||||
|
Assert.assertEquals(row.getCell(2).value, "50", "Checks the value for 'per-page'");
|
||||||
|
}
|
||||||
|
|
||||||
//----------------helpers-------------
|
//----------------helpers-------------
|
||||||
public void loadSampleXml(){
|
public void loadSampleXml(){
|
||||||
loadData( XmlImporterTests.getSample() );
|
loadData( XmlImporterTests.getSample() );
|
||||||
@ -594,4 +685,11 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
|||||||
parser = new JSONTreeReader(inputStream);
|
parser = new JSONTreeReader(inputStream);
|
||||||
return parser;
|
return parser;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private String _getXmlDataFromFile(String fileName) throws IOException {
|
||||||
|
InputStream in = this.getClass().getClassLoader()
|
||||||
|
.getResourceAsStream(fileName);
|
||||||
|
String content = org.apache.commons.io.IOUtils.toString(in, "UTF-8");
|
||||||
|
return content;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user