tao-test/app/taoQtiItem/model/search/QtiItemContentTokenizer.php

103 lines
3.2 KiB
PHP
Raw Normal View History

2022-08-29 20:14:13 +02:00
<?php
/**
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; under version 2
* of the License (non-upgradable).
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright (c) 2014-2021 (original work) Open Assessment Technologies SA (under the project TAO-PRODUCT);
*
*/
namespace oat\taoQtiItem\model\search;
use core_kernel_classes_Resource;
use oat\generis\model\fileReference\FileReferenceSerializer;
use oat\generis\model\OntologyAwareTrait;
use oat\oatbox\service\ConfigurableService;
use oat\tao\model\search\tokenizer\ResourceTokenizer;
use oat\taoQtiItem\model\qti\Service;
use oat\taoQtiItem\model\search\Tokenizer\Filter\TokenFilterInterface;
use taoItems_models_classes_ItemsService;
class QtiItemContentTokenizer extends ConfigurableService implements ResourceTokenizer
{
use OntologyAwareTrait;
public const SERVICE_ID = 'taoQtiItem/QtiItemContentTokenizer';
public const OPTION_FILTERS = 'data_filters';
/**
* Get tokens as string[] extracted from a QTI file
* XML inside qti.xml is parsed and all text is tokenized
*
* @return array
*/
public function getStrings(core_kernel_classes_Resource $resource)
{
try {
$ontologyFiles = $resource->getPropertyValues(
$this->getProperty(taoItems_models_classes_ItemsService::PROPERTY_ITEM_CONTENT)
);
if (empty($ontologyFiles)) {
return [];
}
} catch (\core_kernel_classes_EmptyProperty $e) {
return [];
}
$file = $this->getFileReferenceSerializer()
->unserializeDirectory(reset($ontologyFiles))
->getFile(Service::QTI_ITEM_FILE);
if (!$file->exists()) {
return [];
}
$content = $file->read();
if (empty($content)) {
return [];
}
$dom = new \DOMDocument();
$dom->loadXML($content);
$xpath = new \DOMXPath($dom);
$textNodes = $xpath->query('//text()');
unset($xpath);
$contentStrings = [];
foreach ($textNodes as $textNode) {
if ($this->applyFilters($textNode->wholeText)) {
$contentStrings[] = trim($textNode->wholeText);
}
}
return $contentStrings;
}
protected function getFileReferenceSerializer(): FileReferenceSerializer
{
return $this->getServiceManager()->get(FileReferenceSerializer::SERVICE_ID);
}
protected function applyFilters(string $data): string
{
/** @var TokenFilterInterface $filter */
foreach ($this->getOption(self::OPTION_FILTERS, []) as $filter) {
$output = $filter->filter($output ?? $data);
}
return $output ?? $data;
}
}