Current File : /home/k/a/r/karenpetzb/www/items/category/Query.zip |
PK <�H[w-_�
Css2Xpath.phpnu &1i� <?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Dom
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/**
* Transform CSS selectors to XPath
*
* @package Zend_Dom
* @subpackage Query
* @copyright Copyright (C) 2007 - Present, Zend Technologies, Inc.
* @license New BSD {@link http://framework.zend.com/license/new-bsd}
* @version $Id: Css2Xpath.php 11013 2008-08-24 21:06:20Z thomas $
*/
class Zend_Dom_Query_Css2Xpath
{
/**
* Transform CSS expression to XPath
*
* @param string $path
* @return string
*/
public static function transform($path)
{
$path = (string) $path;
if (strstr($path, ',')) {
$paths = explode(',', $path);
$expressions = array();
foreach ($paths as $path) {
$xpath = self::transform(trim($path));
if (is_string($xpath)) {
$expressions[] = $xpath;
} elseif (is_array($xpath)) {
$expressions = array_merge($expressions, $xpath);
}
}
return $expressions;
}
$paths = array('//');
$segments = preg_split('/\s+/', $path);
foreach ($segments as $key => $segment) {
$pathSegment = self::_tokenize($segment);
if (0 == $key) {
if (0 === strpos($pathSegment, '[contains(@class')) {
$paths[0] .= '*' . $pathSegment;
} else {
$paths[0] .= $pathSegment;
}
continue;
}
if (0 === strpos($pathSegment, '[contains(@class')) {
foreach ($paths as $key => $xpath) {
$paths[$key] .= '//*' . $pathSegment;
$paths[] = $xpath . $pathSegment;
}
} else {
foreach ($paths as $key => $xpath) {
$paths[$key] .= '//' . $pathSegment;
}
}
}
if (1 == count($paths)) {
return $paths[0];
}
return implode(' | ', $paths);
}
/**
* Tokenize CSS expressions to XPath
*
* @param string $expression
* @return string
*/
protected static function _tokenize($expression)
{
// Child selectors
$expression = str_replace('>', '/', $expression);
// IDs
$expression = preg_replace('|#([a-z][a-z0-9_-]*)|i', '[@id=\'$1\']', $expression);
$expression = preg_replace('|(?<![a-z0-9_-])(\[@id=)|i', '*$1', $expression);
// arbitrary attribute strict equality
if (preg_match('|([a-z]+)\[([a-z0-9_-]+)=[\'"]([^\'"]+)[\'"]\]|i', $expression)) {
$expression = preg_replace_callback(
'|([a-z]+)\[([a-z0-9_-]+)=[\'"]([^\'"]+)[\'"]\]|i',
create_function(
'$matches',
'return $matches[1] . "[@" . strtolower($matches[2]) . "=\'" . $matches[3] . "\']";'
),
$expression
);
}
// arbitrary attribute contains full word
if (preg_match('|([a-z]+)\[([a-z0-9_-]+)~=[\'"]([^\'"]+)[\'"]\]|i', $expression)) {
$expression = preg_replace_callback(
'|([a-z]+)\[([a-z0-9_-]+)~=[\'"]([^\'"]+)[\'"]\]|i',
create_function(
'$matches',
'return $matches[1] . "[contains(@" . strtolower($matches[2]) . ", \' $matches[3] \')]";'
),
$expression
);
}
// arbitrary attribute contains specified content
if (preg_match('|([a-z]+)\[([a-z0-9_-]+)\*=[\'"]([^\'"]+)[\'"]\]|i', $expression)) {
$expression = preg_replace_callback(
'|([a-z]+)\[([a-z0-9_-]+)\*=[\'"]([^\'"]+)[\'"]\]|i',
create_function(
'$matches',
'return $matches[1] . "[contains(@" . strtolower($matches[2]) . ", \'" . $matches[3] . "\')]";'
),
$expression
);
}
// Classes
$expression = preg_replace('|\.([a-z][a-z0-9_-]*)|i', "[contains(@class, ' \$1 ')]", $expression);
return $expression;
}
}
PK <�H[% �Z� �
Result.phpnu &1i� <?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Dom
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/**
* Results for DOM XPath query
*
* @package Zend_Dom
* @subpackage Query
* @uses Iterator
* @copyright Copyright (C) 2008 - Present, Zend Technologies, Inc.
* @license New BSD {@link http://framework.zend.com/license/new-bsd}
* @version $Id: Result.php 12507 2008-11-10 16:29:09Z matthew $
*/
class Zend_Dom_Query_Result implements Iterator,Countable
{
/**
* Number of results
* @var int
*/
protected $_count;
/**
* CSS Selector query
* @var string
*/
protected $_cssQuery;
/**
* @var DOMDocument
*/
protected $_document;
/**
* @var DOMNodeList
*/
protected $_nodeList;
/**
* Current iterator position
* @var int
*/
protected $_position = 0;
/**
* @var DOMXPath
*/
protected $_xpath;
/**
* XPath query
* @var string
*/
protected $_xpathQuery;
/**
* Constructor
*
* @param string $cssQuery
* @param string|array $xpathQuery
* @param DOMDocument $document
* @param DOMNodeList $nodeList
* @return void
*/
public function __construct($cssQuery, $xpathQuery, DOMDocument $document, DOMNodeList $nodeList)
{
$this->_cssQuery = $cssQuery;
$this->_xpathQuery = $xpathQuery;
$this->_document = $document;
$this->_nodeList = $nodeList;
}
/**
* Retrieve CSS Query
*
* @return string
*/
public function getCssQuery()
{
return $this->_cssQuery;
}
/**
* Retrieve XPath query
*
* @return string
*/
public function getXpathQuery()
{
return $this->_xpathQuery;
}
/**
* Retrieve DOMDocument
*
* @return DOMDocument
*/
public function getDocument()
{
return $this->_document;
}
/**
* Iterator: rewind to first element
*
* @return void
*/
public function rewind()
{
$this->_position = 0;
return $this->_nodeList->item(0);
}
/**
* Iterator: is current position valid?
*
* @return bool
*/
public function valid()
{
if (in_array($this->_position, range(0, $this->_nodeList->length - 1)) && $this->_nodeList->length > 0) {
return true;
}
return false;
}
/**
* Iterator: return current element
*
* @return DOMElement
*/
public function current()
{
return $this->_nodeList->item($this->_position);
}
/**
* Iterator: return key of current element
*
* @return int
*/
public function key()
{
return $this->_position;
}
/**
* Iterator: move to next element
*
* @return void
*/
public function next()
{
++$this->_position;
return $this->_nodeList->item($this->_position);
}
/**
* Countable: get count
*
* @return int
*/
public function count()
{
return $this->_nodeList->length;
}
}
PK �H[n�@
�K �K
MultiTerm.phpnu &1i� <?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Search_Query */
require_once 'Zend/Search/Lucene/Search/Query.php';
/** Zend_Search_Lucene_Search_Weight_MultiTerm */
require_once 'Zend/Search/Lucene/Search/Weight/MultiTerm.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Search_Query_MultiTerm extends Zend_Search_Lucene_Search_Query
{
/**
* Terms to find.
* Array of Zend_Search_Lucene_Index_Term
*
* @var array
*/
private $_terms = array();
/**
* Term signs.
* If true then term is required.
* If false then term is prohibited.
* If null then term is neither prohibited, nor required
*
* If array is null then all terms are required
*
* @var array
*/
private $_signs;
/**
* Result vector.
*
* @var array
*/
private $_resVector = null;
/**
* Terms positions vectors.
* Array of Arrays:
* term1Id => (docId => freq, ...)
* term2Id => (docId => freq, ...)
*
* @var array
*/
private $_termsFreqs = array();
/**
* A score factor based on the fraction of all query terms
* that a document contains.
* float for conjunction queries
* array of float for non conjunction queries
*
* @var mixed
*/
private $_coord = null;
/**
* Terms weights
* array of Zend_Search_Lucene_Search_Weight
*
* @var array
*/
private $_weights = array();
/**
* Class constructor. Create a new multi-term query object.
*
* if $signs array is omitted then all terms are required
* it differs from addTerm() behavior, but should never be used
*
* @param array $terms Array of Zend_Search_Lucene_Index_Term objects
* @param array $signs Array of signs. Sign is boolean|null.
*/
public function __construct($terms = null, $signs = null)
{
if (is_array($terms)) {
$this->_terms = $terms;
$this->_signs = null;
// Check if all terms are required
if (is_array($signs)) {
foreach ($signs as $sign ) {
if ($sign !== true) {
$this->_signs = $signs;
break;
}
}
}
}
}
/**
* Add a $term (Zend_Search_Lucene_Index_Term) to this query.
*
* The sign is specified as:
* TRUE - term is required
* FALSE - term is prohibited
* NULL - term is neither prohibited, nor required
*
* @param Zend_Search_Lucene_Index_Term $term
* @param boolean|null $sign
* @return void
*/
public function addTerm(Zend_Search_Lucene_Index_Term $term, $sign = null) {
if ($sign !== true || $this->_signs !== null) { // Skip, if all terms are required
if ($this->_signs === null) { // Check, If all previous terms are required
$this->_signs = array();
foreach ($this->_terms as $prevTerm) {
$this->_signs[] = true;
}
}
$this->_signs[] = $sign;
}
$this->_terms[] = $term;
}
/**
* Re-write query into primitive queries in the context of specified index
*
* @param Zend_Search_Lucene_Interface $index
* @return Zend_Search_Lucene_Search_Query
*/
public function rewrite(Zend_Search_Lucene_Interface $index)
{
if (count($this->_terms) == 0) {
return new Zend_Search_Lucene_Search_Query_Empty();
}
// Check, that all fields are qualified
$allQualified = true;
foreach ($this->_terms as $term) {
if ($term->field === null) {
$allQualified = false;
break;
}
}
if ($allQualified) {
return $this;
} else {
/** transform multiterm query to boolean and apply rewrite() method to subqueries. */
$query = new Zend_Search_Lucene_Search_Query_Boolean();
$query->setBoost($this->getBoost());
foreach ($this->_terms as $termId => $term) {
$subquery = new Zend_Search_Lucene_Search_Query_Term($term);
$query->addSubquery($subquery->rewrite($index),
($this->_signs === null)? true : $this->_signs[$termId]);
}
return $query;
}
}
/**
* Optimize query in the context of specified index
*
* @param Zend_Search_Lucene_Interface $index
* @return Zend_Search_Lucene_Search_Query
*/
public function optimize(Zend_Search_Lucene_Interface $index)
{
$terms = $this->_terms;
$signs = $this->_signs;
foreach ($terms as $id => $term) {
if (!$index->hasTerm($term)) {
if ($signs === null || $signs[$id] === true) {
// Term is required
return new Zend_Search_Lucene_Search_Query_Empty();
} else {
// Term is optional or prohibited
// Remove it from terms and signs list
unset($terms[$id]);
unset($signs[$id]);
}
}
}
// Check if all presented terms are prohibited
$allProhibited = true;
if ($signs === null) {
$allProhibited = false;
} else {
foreach ($signs as $sign) {
if ($sign !== false) {
$allProhibited = false;
break;
}
}
}
if ($allProhibited) {
return new Zend_Search_Lucene_Search_Query_Empty();
}
/**
* @todo make an optimization for repeated terms
* (they may have different signs)
*/
if (count($terms) == 1) {
// It's already checked, that it's not a prohibited term
// It's one term query with one required or optional element
$optimizedQuery = new Zend_Search_Lucene_Search_Query_Term(reset($terms));
$optimizedQuery->setBoost($this->getBoost());
return $optimizedQuery;
}
if (count($terms) == 0) {
return new Zend_Search_Lucene_Search_Query_Empty();
}
$optimizedQuery = new Zend_Search_Lucene_Search_Query_MultiTerm($terms, $signs);
$optimizedQuery->setBoost($this->getBoost());
return $optimizedQuery;
}
/**
* Returns query term
*
* @return array
*/
public function getTerms()
{
return $this->_terms;
}
/**
* Return terms signs
*
* @return array
*/
public function getSigns()
{
return $this->_signs;
}
/**
* Set weight for specified term
*
* @param integer $num
* @param Zend_Search_Lucene_Search_Weight_Term $weight
*/
public function setWeight($num, $weight)
{
$this->_weights[$num] = $weight;
}
/**
* Constructs an appropriate Weight implementation for this query.
*
* @param Zend_Search_Lucene_Interface $reader
* @return Zend_Search_Lucene_Search_Weight
*/
public function createWeight(Zend_Search_Lucene_Interface $reader)
{
$this->_weight = new Zend_Search_Lucene_Search_Weight_MultiTerm($this, $reader);
return $this->_weight;
}
/**
* Calculate result vector for Conjunction query
* (like '+something +another')
*
* @param Zend_Search_Lucene_Interface $reader
*/
private function _calculateConjunctionResult(Zend_Search_Lucene_Interface $reader)
{
$this->_resVector = null;
if (count($this->_terms) == 0) {
$this->_resVector = array();
}
// Order terms by selectivity
$docFreqs = array();
$ids = array();
foreach ($this->_terms as $id => $term) {
$docFreqs[] = $reader->docFreq($term);
$ids[] = $id; // Used to keep original order for terms with the same selectivity and omit terms comparison
}
array_multisort($docFreqs, SORT_ASC, SORT_NUMERIC,
$ids, SORT_ASC, SORT_NUMERIC,
$this->_terms);
$docsFilter = new Zend_Search_Lucene_Index_DocsFilter();
foreach ($this->_terms as $termId => $term) {
$termDocs = $reader->termDocs($term, $docsFilter);
}
// Treat last retrieved docs vector as a result set
// (filter collects data for other terms)
$this->_resVector = array_flip($termDocs);
foreach ($this->_terms as $termId => $term) {
$this->_termsFreqs[$termId] = $reader->termFreqs($term, $docsFilter);
}
// ksort($this->_resVector, SORT_NUMERIC);
// Docs are returned ordered. Used algorithms doesn't change elements order.
}
/**
* Calculate result vector for non Conjunction query
* (like '+something -another')
*
* @param Zend_Search_Lucene_Interface $reader
*/
private function _calculateNonConjunctionResult(Zend_Search_Lucene_Interface $reader)
{
$requiredVectors = array();
$requiredVectorsSizes = array();
$requiredVectorsIds = array(); // is used to prevent arrays comparison
$optional = array();
$prohibited = array();
foreach ($this->_terms as $termId => $term) {
$termDocs = array_flip($reader->termDocs($term));
if ($this->_signs[$termId] === true) {
// required
$requiredVectors[] = $termDocs;
$requiredVectorsSizes[] = count($termDocs);
$requiredVectorsIds[] = $termId;
} elseif ($this->_signs[$termId] === false) {
// prohibited
// array union
$prohibited += $termDocs;
} else {
// neither required, nor prohibited
// array union
$optional += $termDocs;
}
$this->_termsFreqs[$termId] = $reader->termFreqs($term);
}
// sort resvectors in order of subquery cardinality increasing
array_multisort($requiredVectorsSizes, SORT_ASC, SORT_NUMERIC,
$requiredVectorsIds, SORT_ASC, SORT_NUMERIC,
$requiredVectors);
$required = null;
foreach ($requiredVectors as $nextResVector) {
if($required === null) {
$required = $nextResVector;
} else {
//$required = array_intersect_key($required, $nextResVector);
/**
* This code is used as workaround for array_intersect_key() slowness problem.
*/
$updatedVector = array();
foreach ($required as $id => $value) {
if (isset($nextResVector[$id])) {
$updatedVector[$id] = $value;
}
}
$required = $updatedVector;
}
if (count($required) == 0) {
// Empty result set, we don't need to check other terms
break;
}
}
if ($required !== null) {
$this->_resVector = $required;
} else {
$this->_resVector = $optional;
}
if (count($prohibited) != 0) {
// $this->_resVector = array_diff_key($this->_resVector, $prohibited);
/**
* This code is used as workaround for array_diff_key() slowness problem.
*/
if (count($this->_resVector) < count($prohibited)) {
$updatedVector = $this->_resVector;
foreach ($this->_resVector as $id => $value) {
if (isset($prohibited[$id])) {
unset($updatedVector[$id]);
}
}
$this->_resVector = $updatedVector;
} else {
$updatedVector = $this->_resVector;
foreach ($prohibited as $id => $value) {
unset($updatedVector[$id]);
}
$this->_resVector = $updatedVector;
}
}
ksort($this->_resVector, SORT_NUMERIC);
}
/**
* Score calculator for conjunction queries (all terms are required)
*
* @param integer $docId
* @param Zend_Search_Lucene_Interface $reader
* @return float
*/
public function _conjunctionScore($docId, Zend_Search_Lucene_Interface $reader)
{
if ($this->_coord === null) {
$this->_coord = $reader->getSimilarity()->coord(count($this->_terms),
count($this->_terms) );
}
$score = 0.0;
foreach ($this->_terms as $termId => $term) {
/**
* We don't need to check that term freq is not 0
* Score calculation is performed only for matched docs
*/
$score += $reader->getSimilarity()->tf($this->_termsFreqs[$termId][$docId]) *
$this->_weights[$termId]->getValue() *
$reader->norm($docId, $term->field);
}
return $score * $this->_coord * $this->getBoost();
}
/**
* Score calculator for non conjunction queries (not all terms are required)
*
* @param integer $docId
* @param Zend_Search_Lucene_Interface $reader
* @return float
*/
public function _nonConjunctionScore($docId, $reader)
{
if ($this->_coord === null) {
$this->_coord = array();
$maxCoord = 0;
foreach ($this->_signs as $sign) {
if ($sign !== false /* not prohibited */) {
$maxCoord++;
}
}
for ($count = 0; $count <= $maxCoord; $count++) {
$this->_coord[$count] = $reader->getSimilarity()->coord($count, $maxCoord);
}
}
$score = 0.0;
$matchedTerms = 0;
foreach ($this->_terms as $termId=>$term) {
// Check if term is
if ($this->_signs[$termId] !== false && // not prohibited
isset($this->_termsFreqs[$termId][$docId]) // matched
) {
$matchedTerms++;
/**
* We don't need to check that term freq is not 0
* Score calculation is performed only for matched docs
*/
$score +=
$reader->getSimilarity()->tf($this->_termsFreqs[$termId][$docId]) *
$this->_weights[$termId]->getValue() *
$reader->norm($docId, $term->field);
}
}
return $score * $this->_coord[$matchedTerms] * $this->getBoost();
}
/**
* Execute query in context of index reader
* It also initializes necessary internal structures
*
* @param Zend_Search_Lucene_Interface $reader
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
*/
public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
{
if ($this->_signs === null) {
$this->_calculateConjunctionResult($reader);
} else {
$this->_calculateNonConjunctionResult($reader);
}
// Initialize weight if it's not done yet
$this->_initWeight($reader);
}
/**
* Get document ids likely matching the query
*
* It's an array with document ids as keys (performance considerations)
*
* @return array
*/
public function matchedDocs()
{
return $this->_resVector;
}
/**
* Score specified document
*
* @param integer $docId
* @param Zend_Search_Lucene_Interface $reader
* @return float
*/
public function score($docId, Zend_Search_Lucene_Interface $reader)
{
if (isset($this->_resVector[$docId])) {
if ($this->_signs === null) {
return $this->_conjunctionScore($docId, $reader);
} else {
return $this->_nonConjunctionScore($docId, $reader);
}
} else {
return 0;
}
}
/**
* Return query terms
*
* @return array
*/
public function getQueryTerms()
{
if ($this->_signs === null) {
return $this->_terms;
}
$terms = array();
foreach ($this->_signs as $id => $sign) {
if ($sign !== false) {
$terms[] = $this->_terms[$id];
}
}
return $terms;
}
/**
* Highlight query terms
*
* @param integer &$colorIndex
* @param Zend_Search_Lucene_Document_Html $doc
*/
public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)
{
$words = array();
if ($this->_signs === null) {
foreach ($this->_terms as $term) {
$words[] = $term->text;
}
} else {
foreach ($this->_signs as $id => $sign) {
if ($sign !== false) {
$words[] = $this->_terms[$id]->text;
}
}
}
$doc->highlight($words, $this->_getHighlightColor($colorIndex));
}
/**
* Print a query
*
* @return string
*/
public function __toString()
{
// It's used only for query visualisation, so we don't care about characters escaping
$query = '';
foreach ($this->_terms as $id => $term) {
if ($id != 0) {
$query .= ' ';
}
if ($this->_signs === null || $this->_signs[$id] === true) {
$query .= '+';
} else if ($this->_signs[$id] === false) {
$query .= '-';
}
if ($term->field !== null) {
$query .= $term->field . ':';
}
$query .= $term->text;
}
if ($this->getBoost() != 1) {
$query = '(' . $query . ')^' . $this->getBoost();
}
return $query;
}
}
PK �H[�ͯ9�C �C
Phrase.phpnu &1i� <?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/**
* Zend_Search_Lucene_Search_Query
*/
require_once 'Zend/Search/Lucene/Search/Query.php';
/**
* Zend_Search_Lucene_Search_Weight_MultiTerm
*/
require_once 'Zend/Search/Lucene/Search/Weight/Phrase.php';
/**
* A Query that matches documents containing a particular sequence of terms.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Search_Query_Phrase extends Zend_Search_Lucene_Search_Query
{
/**
* Terms to find.
* Array of Zend_Search_Lucene_Index_Term objects.
*
* @var array
*/
private $_terms;
/**
* Term positions (relative positions of terms within the phrase).
* Array of integers
*
* @var array
*/
private $_offsets;
/**
* Sets the number of other words permitted between words in query phrase.
* If zero, then this is an exact phrase search. For larger values this works
* like a WITHIN or NEAR operator.
*
* The slop is in fact an edit-distance, where the units correspond to
* moves of terms in the query phrase out of position. For example, to switch
* the order of two words requires two moves (the first move places the words
* atop one another), so to permit re-orderings of phrases, the slop must be
* at least two.
* More exact matches are scored higher than sloppier matches, thus search
* results are sorted by exactness.
*
* The slop is zero by default, requiring exact matches.
*
* @var integer
*/
private $_slop;
/**
* Result vector.
*
* @var array
*/
private $_resVector = null;
/**
* Terms positions vectors.
* Array of Arrays:
* term1Id => (docId => array( pos1, pos2, ... ), ...)
* term2Id => (docId => array( pos1, pos2, ... ), ...)
*
* @var array
*/
private $_termsPositions = array();
/**
* Class constructor. Create a new prase query.
*
* @param string $field Field to search.
* @param array $terms Terms to search Array of strings.
* @param array $offsets Relative term positions. Array of integers.
* @throws Zend_Search_Lucene_Exception
*/
public function __construct($terms = null, $offsets = null, $field = null)
{
$this->_slop = 0;
if (is_array($terms)) {
$this->_terms = array();
foreach ($terms as $termId => $termText) {
$this->_terms[$termId] = ($field !== null)? new Zend_Search_Lucene_Index_Term($termText, $field):
new Zend_Search_Lucene_Index_Term($termText);
}
} else if ($terms === null) {
$this->_terms = array();
} else {
throw new Zend_Search_Lucene_Exception('terms argument must be array of strings or null');
}
if (is_array($offsets)) {
if (count($this->_terms) != count($offsets)) {
throw new Zend_Search_Lucene_Exception('terms and offsets arguments must have the same size.');
}
$this->_offsets = $offsets;
} else if ($offsets === null) {
$this->_offsets = array();
foreach ($this->_terms as $termId => $term) {
$position = count($this->_offsets);
$this->_offsets[$termId] = $position;
}
} else {
throw new Zend_Search_Lucene_Exception('offsets argument must be array of strings or null');
}
}
/**
* Set slop
*
* @param integer $slop
*/
public function setSlop($slop)
{
$this->_slop = $slop;
}
/**
* Get slop
*
* @return integer
*/
public function getSlop()
{
return $this->_slop;
}
/**
* Adds a term to the end of the query phrase.
* The relative position of the term is specified explicitly or the one immediately
* after the last term added.
*
* @param Zend_Search_Lucene_Index_Term $term
* @param integer $position
*/
public function addTerm(Zend_Search_Lucene_Index_Term $term, $position = null) {
if ((count($this->_terms) != 0)&&(end($this->_terms)->field != $term->field)) {
throw new Zend_Search_Lucene_Exception('All phrase terms must be in the same field: ' .
$term->field . ':' . $term->text);
}
$this->_terms[] = $term;
if ($position !== null) {
$this->_offsets[] = $position;
} else if (count($this->_offsets) != 0) {
$this->_offsets[] = end($this->_offsets) + 1;
} else {
$this->_offsets[] = 0;
}
}
/**
* Re-write query into primitive queries in the context of specified index
*
* @param Zend_Search_Lucene_Interface $index
* @return Zend_Search_Lucene_Search_Query
*/
public function rewrite(Zend_Search_Lucene_Interface $index)
{
if (count($this->_terms) == 0) {
return new Zend_Search_Lucene_Search_Query_Empty();
} else if ($this->_terms[0]->field !== null) {
return $this;
} else {
$query = new Zend_Search_Lucene_Search_Query_Boolean();
$query->setBoost($this->getBoost());
foreach ($index->getFieldNames(true) as $fieldName) {
$subquery = new Zend_Search_Lucene_Search_Query_Phrase();
$subquery->setSlop($this->getSlop());
foreach ($this->_terms as $termId => $term) {
$qualifiedTerm = new Zend_Search_Lucene_Index_Term($term->text, $fieldName);
$subquery->addTerm($qualifiedTerm, $this->_offsets[$termId]);
}
$query->addSubquery($subquery);
}
return $query;
}
}
/**
* Optimize query in the context of specified index
*
* @param Zend_Search_Lucene_Interface $index
* @return Zend_Search_Lucene_Search_Query
*/
public function optimize(Zend_Search_Lucene_Interface $index)
{
// Check, that index contains all phrase terms
foreach ($this->_terms as $term) {
if (!$index->hasTerm($term)) {
return new Zend_Search_Lucene_Search_Query_Empty();
}
}
if (count($this->_terms) == 1) {
// It's one term query
$optimizedQuery = new Zend_Search_Lucene_Search_Query_Term(reset($this->_terms));
$optimizedQuery->setBoost($this->getBoost());
return $optimizedQuery;
}
if (count($this->_terms) == 0) {
return new Zend_Search_Lucene_Search_Query_Empty();
}
return $this;
}
/**
* Returns query term
*
* @return array
*/
public function getTerms()
{
return $this->_terms;
}
/**
* Set weight for specified term
*
* @param integer $num
* @param Zend_Search_Lucene_Search_Weight_Term $weight
*/
public function setWeight($num, $weight)
{
$this->_weights[$num] = $weight;
}
/**
* Constructs an appropriate Weight implementation for this query.
*
* @param Zend_Search_Lucene_Interface $reader
* @return Zend_Search_Lucene_Search_Weight
*/
public function createWeight(Zend_Search_Lucene_Interface $reader)
{
$this->_weight = new Zend_Search_Lucene_Search_Weight_Phrase($this, $reader);
return $this->_weight;
}
/**
* Score calculator for exact phrase queries (terms sequence is fixed)
*
* @param integer $docId
* @return float
*/
public function _exactPhraseFreq($docId)
{
$freq = 0;
// Term Id with lowest cardinality
$lowCardTermId = null;
// Calculate $lowCardTermId
foreach ($this->_terms as $termId => $term) {
if ($lowCardTermId === null ||
count($this->_termsPositions[$termId][$docId]) <
count($this->_termsPositions[$lowCardTermId][$docId]) ) {
$lowCardTermId = $termId;
}
}
// Walk through positions of the term with lowest cardinality
foreach ($this->_termsPositions[$lowCardTermId][$docId] as $lowCardPos) {
// We expect phrase to be found
$freq++;
// Walk through other terms
foreach ($this->_terms as $termId => $term) {
if ($termId != $lowCardTermId) {
$expectedPosition = $lowCardPos +
($this->_offsets[$termId] -
$this->_offsets[$lowCardTermId]);
if (!in_array($expectedPosition, $this->_termsPositions[$termId][$docId])) {
$freq--; // Phrase wasn't found.
break;
}
}
}
}
return $freq;
}
/**
* Score calculator for sloppy phrase queries (terms sequence is fixed)
*
* @param integer $docId
* @param Zend_Search_Lucene_Interface $reader
* @return float
*/
public function _sloppyPhraseFreq($docId, Zend_Search_Lucene_Interface $reader)
{
$freq = 0;
$phraseQueue = array();
$phraseQueue[0] = array(); // empty phrase
$lastTerm = null;
// Walk through the terms to create phrases.
foreach ($this->_terms as $termId => $term) {
$queueSize = count($phraseQueue);
$firstPass = true;
// Walk through the term positions.
// Each term position produces a set of phrases.
foreach ($this->_termsPositions[$termId][$docId] as $termPosition ) {
if ($firstPass) {
for ($count = 0; $count < $queueSize; $count++) {
$phraseQueue[$count][$termId] = $termPosition;
}
} else {
for ($count = 0; $count < $queueSize; $count++) {
if ($lastTerm !== null &&
abs( $termPosition - $phraseQueue[$count][$lastTerm] -
($this->_offsets[$termId] - $this->_offsets[$lastTerm])) > $this->_slop) {
continue;
}
$newPhraseId = count($phraseQueue);
$phraseQueue[$newPhraseId] = $phraseQueue[$count];
$phraseQueue[$newPhraseId][$termId] = $termPosition;
}
}
$firstPass = false;
}
$lastTerm = $termId;
}
foreach ($phraseQueue as $phrasePos) {
$minDistance = null;
for ($shift = -$this->_slop; $shift <= $this->_slop; $shift++) {
$distance = 0;
$start = reset($phrasePos) - reset($this->_offsets) + $shift;
foreach ($this->_terms as $termId => $term) {
$distance += abs($phrasePos[$termId] - $this->_offsets[$termId] - $start);
if($distance > $this->_slop) {
break;
}
}
if ($minDistance === null || $distance < $minDistance) {
$minDistance = $distance;
}
}
if ($minDistance <= $this->_slop) {
$freq += $reader->getSimilarity()->sloppyFreq($minDistance);
}
}
return $freq;
}
/**
* Execute query in context of index reader
* It also initializes necessary internal structures
*
* @param Zend_Search_Lucene_Interface $reader
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
*/
public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
{
$this->_resVector = null;
if (count($this->_terms) == 0) {
$this->_resVector = array();
}
$resVectors = array();
$resVectorsSizes = array();
$resVectorsIds = array(); // is used to prevent arrays comparison
foreach ($this->_terms as $termId => $term) {
$resVectors[] = array_flip($reader->termDocs($term));
$resVectorsSizes[] = count(end($resVectors));
$resVectorsIds[] = $termId;
$this->_termsPositions[$termId] = $reader->termPositions($term);
}
// sort resvectors in order of subquery cardinality increasing
array_multisort($resVectorsSizes, SORT_ASC, SORT_NUMERIC,
$resVectorsIds, SORT_ASC, SORT_NUMERIC,
$resVectors);
foreach ($resVectors as $nextResVector) {
if($this->_resVector === null) {
$this->_resVector = $nextResVector;
} else {
//$this->_resVector = array_intersect_key($this->_resVector, $nextResVector);
/**
* This code is used as workaround for array_intersect_key() slowness problem.
*/
$updatedVector = array();
foreach ($this->_resVector as $id => $value) {
if (isset($nextResVector[$id])) {
$updatedVector[$id] = $value;
}
}
$this->_resVector = $updatedVector;
}
if (count($this->_resVector) == 0) {
// Empty result set, we don't need to check other terms
break;
}
}
// ksort($this->_resVector, SORT_NUMERIC);
// Docs are returned ordered. Used algorithm doesn't change elements order.
// Initialize weight if it's not done yet
$this->_initWeight($reader);
}
/**
* Get document ids likely matching the query
*
* It's an array with document ids as keys (performance considerations)
*
* @return array
*/
public function matchedDocs()
{
return $this->_resVector;
}
/**
* Score specified document
*
* @param integer $docId
* @param Zend_Search_Lucene_Interface $reader
* @return float
*/
public function score($docId, Zend_Search_Lucene_Interface $reader)
{
if (isset($this->_resVector[$docId])) {
if ($this->_slop == 0) {
$freq = $this->_exactPhraseFreq($docId);
} else {
$freq = $this->_sloppyPhraseFreq($docId, $reader);
}
if ($freq != 0) {
$tf = $reader->getSimilarity()->tf($freq);
$weight = $this->_weight->getValue();
$norm = $reader->norm($docId, reset($this->_terms)->field);
return $tf * $weight * $norm * $this->getBoost();
}
// Included in result, but culculated freq is zero
return 0;
} else {
return 0;
}
}
/**
* Return query terms
*
* @return array
*/
public function getQueryTerms()
{
return $this->_terms;
}
/**
* Highlight query terms
*
* @param integer &$colorIndex
* @param Zend_Search_Lucene_Document_Html $doc
*/
public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)
{
$words = array();
foreach ($this->_terms as $term) {
$words[] = $term->text;
}
$doc->highlight($words, $this->_getHighlightColor($colorIndex));
}
/**
* Print a query
*
* @return string
*/
public function __toString()
{
// It's used only for query visualisation, so we don't care about characters escaping
$query = '';
if (isset($this->_terms[0]) && $this->_terms[0]->field !== null) {
$query .= $this->_terms[0]->field . ':';
}
$query .= '"';
foreach ($this->_terms as $id => $term) {
if ($id != 0) {
$query .= ' ';
}
$query .= $term->text;
}
$query .= '"';
if ($this->_slop != 0) {
$query .= '~' . $this->_slop;
}
return $query;
}
}
PK �H[���DN8 N8 Fuzzy.phpnu &1i� <?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Search_Query */
require_once 'Zend/Search/Lucene/Search/Query.php';
/** Zend_Search_Lucene_Search_Query_MultiTerm */
require_once 'Zend/Search/Lucene/Search/Query/MultiTerm.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Search_Query_Fuzzy extends Zend_Search_Lucene_Search_Query
{
/** Default minimum similarity */
const DEFAULT_MIN_SIMILARITY = 0.5;
/**
* Maximum number of matched terms.
* Apache Lucene defines this limitation as boolean query maximum number of clauses:
* org.apache.lucene.search.BooleanQuery.getMaxClauseCount()
*/
const MAX_CLAUSE_COUNT = 1024;
/**
* Array of precalculated max distances
*
* keys are integers representing a word size
*/
private $_maxDistances = array();
/**
* Base searching term.
*
* @var Zend_Search_Lucene_Index_Term
*/
private $_term;
/**
* A value between 0 and 1 to set the required similarity
* between the query term and the matching terms. For example, for a
* _minimumSimilarity of 0.5 a term of the same length
* as the query term is considered similar to the query term if the edit distance
* between both terms is less than length(term)*0.5
*
* @var float
*/
private $_minimumSimilarity;
/**
* The length of common (non-fuzzy) prefix
*
* @var integer
*/
private $_prefixLength;
/**
* Matched terms.
*
* Matched terms list.
* It's filled during the search (rewrite operation) and may be used for search result
* post-processing
*
* Array of Zend_Search_Lucene_Index_Term objects
*
* @var array
*/
private $_matches = null;
/**
* Matched terms scores
*
* @var array
*/
private $_scores = null;
/**
* Array of the term keys.
* Used to sort terms in alphabetical order if terms have the same socres
*
* @var array
*/
private $_termKeys = null;
/**
* Zend_Search_Lucene_Search_Query_Wildcard constructor.
*
* @param Zend_Search_Lucene_Index_Term $pattern
* @throws Zend_Search_Lucene_Exception
*/
public function __construct(Zend_Search_Lucene_Index_Term $term, $minimumSimilarity = self::DEFAULT_MIN_SIMILARITY, $prefixLength = 0)
{
if ($minimumSimilarity < 0) {
throw new Zend_Search_Lucene_Exception('minimumSimilarity cannot be less than 0');
}
if ($minimumSimilarity >= 1) {
throw new Zend_Search_Lucene_Exception('minimumSimilarity cannot be greater than or equal to 1');
}
if ($prefixLength < 0) {
throw new Zend_Search_Lucene_Exception('prefixLength cannot be less than 0');
}
$this->_term = $term;
$this->_minimumSimilarity = $minimumSimilarity;
$this->_prefixLength = $prefixLength;
}
/**
* Calculate maximum distance for specified word length
*
* @param integer $prefixLength
* @param integer $termLength
* @param integer $length
* @return integer
*/
private function _calculateMaxDistance($prefixLength, $termLength, $length)
{
$this->_maxDistances[$length] = (int) ((1 - $this->_minimumSimilarity)*(min($termLength, $length) + $prefixLength));
return $this->_maxDistances[$length];
}
/**
* Re-write query into primitive queries in the context of specified index
*
* @param Zend_Search_Lucene_Interface $index
* @return Zend_Search_Lucene_Search_Query
*/
public function rewrite(Zend_Search_Lucene_Interface $index)
{
$this->_matches = array();
$this->_scores = array();
$this->_termKeys = array();
if ($this->_term->field === null) {
// Search through all fields
$fields = $index->getFieldNames(true /* indexed fields list */);
} else {
$fields = array($this->_term->field);
}
$prefix = Zend_Search_Lucene_Index_Term::getPrefix($this->_term->text, $this->_prefixLength);
$prefixByteLength = strlen($prefix);
$prefixUtf8Length = Zend_Search_Lucene_Index_Term::getLength($prefix);
$termLength = Zend_Search_Lucene_Index_Term::getLength($this->_term->text);
$termRest = substr($this->_term->text, $prefixByteLength);
// we calculate length of the rest in bytes since levenshtein() is not UTF-8 compatible
$termRestLength = strlen($termRest);
$scaleFactor = 1/(1 - $this->_minimumSimilarity);
foreach ($fields as $field) {
$index->resetTermsStream();
if ($prefix != '') {
$index->skipTo(new Zend_Search_Lucene_Index_Term($prefix, $field));
while ($index->currentTerm() !== null &&
$index->currentTerm()->field == $field &&
substr($index->currentTerm()->text, 0, $prefixByteLength) == $prefix) {
// Calculate similarity
$target = substr($index->currentTerm()->text, $prefixByteLength);
$maxDistance = isset($this->_maxDistances[strlen($target)])?
$this->_maxDistances[strlen($target)] :
$this->_calculateMaxDistance($prefixUtf8Length, $termRestLength, strlen($target));
if ($termRestLength == 0) {
// we don't have anything to compare. That means if we just add
// the letters for current term we get the new word
$similarity = (($prefixUtf8Length == 0)? 0 : 1 - strlen($target)/$prefixUtf8Length);
} else if (strlen($target) == 0) {
$similarity = (($prefixUtf8Length == 0)? 0 : 1 - $termRestLength/$prefixUtf8Length);
} else if ($maxDistance < abs($termRestLength - strlen($target))){
//just adding the characters of term to target or vice-versa results in too many edits
//for example "pre" length is 3 and "prefixes" length is 8. We can see that
//given this optimal circumstance, the edit distance cannot be less than 5.
//which is 8-3 or more precisesly abs(3-8).
//if our maximum edit distance is 4, then we can discard this word
//without looking at it.
$similarity = 0;
} else {
$similarity = 1 - levenshtein($termRest, $target)/($prefixUtf8Length + min($termRestLength, strlen($target)));
}
if ($similarity > $this->_minimumSimilarity) {
$this->_matches[] = $index->currentTerm();
$this->_termKeys[] = $index->currentTerm()->key();
$this->_scores[] = ($similarity - $this->_minimumSimilarity)*$scaleFactor;
}
$index->nextTerm();
}
} else {
$index->skipTo(new Zend_Search_Lucene_Index_Term('', $field));
while ($index->currentTerm() !== null && $index->currentTerm()->field == $field) {
// Calculate similarity
$target = $index->currentTerm()->text;
$maxDistance = isset($this->_maxDistances[strlen($target)])?
$this->_maxDistances[strlen($target)] :
$this->_calculateMaxDistance(0, $termRestLength, strlen($target));
if ($maxDistance < abs($termRestLength - strlen($target))){
//just adding the characters of term to target or vice-versa results in too many edits
//for example "pre" length is 3 and "prefixes" length is 8. We can see that
//given this optimal circumstance, the edit distance cannot be less than 5.
//which is 8-3 or more precisesly abs(3-8).
//if our maximum edit distance is 4, then we can discard this word
//without looking at it.
$similarity = 0;
} else {
$similarity = 1 - levenshtein($termRest, $target)/min($termRestLength, strlen($target));
}
if ($similarity > $this->_minimumSimilarity) {
$this->_matches[] = $index->currentTerm();
$this->_termKeys[] = $index->currentTerm()->key();
$this->_scores[] = ($similarity - $this->_minimumSimilarity)*$scaleFactor;
}
$index->nextTerm();
}
}
$index->closeTermsStream();
}
if (count($this->_matches) == 0) {
return new Zend_Search_Lucene_Search_Query_Empty();
} else if (count($this->_matches) == 1) {
return new Zend_Search_Lucene_Search_Query_Term(reset($this->_matches));
} else {
$rewrittenQuery = new Zend_Search_Lucene_Search_Query_Boolean();
array_multisort($this->_scores, SORT_DESC, SORT_NUMERIC,
$this->_termKeys, SORT_ASC, SORT_STRING,
$this->_matches);
$termCount = 0;
foreach ($this->_matches as $id => $matchedTerm) {
$subquery = new Zend_Search_Lucene_Search_Query_Term($matchedTerm);
$subquery->setBoost($this->_scores[$id]);
$rewrittenQuery->addSubquery($subquery);
$termCount++;
if ($termCount >= self::MAX_CLAUSE_COUNT) {
break;
}
}
return $rewrittenQuery;
}
}
/**
* Optimize query in the context of specified index
*
* @param Zend_Search_Lucene_Interface $index
* @return Zend_Search_Lucene_Search_Query
*/
public function optimize(Zend_Search_Lucene_Interface $index)
{
throw new Zend_Search_Lucene_Exception('Wildcard query should not be directly used for search. Use $query->rewrite($index)');
}
/**
* Return query terms
*
* @return array
* @throws Zend_Search_Lucene_Exception
*/
public function getQueryTerms()
{
if ($this->_matches === null) {
throw new Zend_Search_Lucene_Exception('Search has to be performed first to get matched terms');
}
return $this->_matches;
}
/**
* Constructs an appropriate Weight implementation for this query.
*
* @param Zend_Search_Lucene_Interface $reader
* @return Zend_Search_Lucene_Search_Weight
* @throws Zend_Search_Lucene_Exception
*/
public function createWeight(Zend_Search_Lucene_Interface $reader)
{
throw new Zend_Search_Lucene_Exception('Wildcard query should not be directly used for search. Use $query->rewrite($index)');
}
/**
* Execute query in context of index reader
* It also initializes necessary internal structures
*
* @param Zend_Search_Lucene_Interface $reader
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
* @throws Zend_Search_Lucene_Exception
*/
public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
{
throw new Zend_Search_Lucene_Exception('Wildcard query should not be directly used for search. Use $query->rewrite($index)');
}
/**
* Get document ids likely matching the query
*
* It's an array with document ids as keys (performance considerations)
*
* @return array
* @throws Zend_Search_Lucene_Exception
*/
public function matchedDocs()
{
throw new Zend_Search_Lucene_Exception('Wildcard query should not be directly used for search. Use $query->rewrite($index)');
}
/**
* Score specified document
*
* @param integer $docId
* @param Zend_Search_Lucene_Interface $reader
* @return float
* @throws Zend_Search_Lucene_Exception
*/
public function score($docId, Zend_Search_Lucene_Interface $reader)
{
throw new Zend_Search_Lucene_Exception('Wildcard query should not be directly used for search. Use $query->rewrite($index)');
}
/**
* Highlight query terms
*
* @param integer &$colorIndex
* @param Zend_Search_Lucene_Document_Html $doc
*/
public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)
{
$words = array();
foreach ($this->_matches as $term) {
$words[] = $term->text;
}
$doc->highlight($words, $this->_getHighlightColor($colorIndex));
}
/**
* Print a query
*
* @return string
*/
public function __toString()
{
// It's used only for query visualisation, so we don't care about characters escaping
return (($this->_term->field === null)? '' : $this->_term->field . ':')
. $this->_term->text . '~'
. (($this->_minimumSimilarity != self::DEFAULT_MIN_SIMILARITY)? round($this->_minimumSimilarity, 4) : '');
}
}
PK �H[��8��h �h Boolean.phpnu &1i� <?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Search_Query */
require_once 'Zend/Search/Lucene/Search/Query.php';
/** Zend_Search_Lucene_Search_Weight_Boolean */
require_once 'Zend/Search/Lucene/Search/Weight/Boolean.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Search_Query_Boolean extends Zend_Search_Lucene_Search_Query
{
/**
* Subqueries
* Array of Zend_Search_Lucene_Search_Query
*
* @var array
*/
private $_subqueries = array();
/**
* Subqueries signs.
* If true then subquery is required.
* If false then subquery is prohibited.
* If null then subquery is neither prohibited, nor required
*
* If array is null then all subqueries are required
*
* @var array
*/
private $_signs = array();
/**
* Result vector.
*
* @var array
*/
private $_resVector = null;
/**
* A score factor based on the fraction of all query subqueries
* that a document contains.
* float for conjunction queries
* array of float for non conjunction queries
*
* @var mixed
*/
private $_coord = null;
/**
* Class constructor. Create a new Boolean query object.
*
* if $signs array is omitted then all subqueries are required
* it differs from addSubquery() behavior, but should never be used
*
* @param array $subqueries Array of Zend_Search_Search_Query objects
* @param array $signs Array of signs. Sign is boolean|null.
* @return void
*/
public function __construct($subqueries = null, $signs = null)
{
if (is_array($subqueries)) {
$this->_subqueries = $subqueries;
$this->_signs = null;
// Check if all subqueries are required
if (is_array($signs)) {
foreach ($signs as $sign ) {
if ($sign !== true) {
$this->_signs = $signs;
break;
}
}
}
}
}
/**
* Add a $subquery (Zend_Search_Lucene_Search_Query) to this query.
*
* The sign is specified as:
* TRUE - subquery is required
* FALSE - subquery is prohibited
* NULL - subquery is neither prohibited, nor required
*
* @param Zend_Search_Lucene_Search_Query $subquery
* @param boolean|null $sign
* @return void
*/
public function addSubquery(Zend_Search_Lucene_Search_Query $subquery, $sign=null) {
if ($sign !== true || $this->_signs !== null) { // Skip, if all subqueries are required
if ($this->_signs === null) { // Check, If all previous subqueries are required
$this->_signs = array();
foreach ($this->_subqueries as $prevSubquery) {
$this->_signs[] = true;
}
}
$this->_signs[] = $sign;
}
$this->_subqueries[] = $subquery;
}
/**
* Re-write queries into primitive queries
*
* @param Zend_Search_Lucene_Interface $index
* @return Zend_Search_Lucene_Search_Query
*/
public function rewrite(Zend_Search_Lucene_Interface $index)
{
$query = new Zend_Search_Lucene_Search_Query_Boolean();
$query->setBoost($this->getBoost());
foreach ($this->_subqueries as $subqueryId => $subquery) {
$query->addSubquery($subquery->rewrite($index),
($this->_signs === null)? true : $this->_signs[$subqueryId]);
}
return $query;
}
/**
* Optimize query in the context of specified index
*
* @param Zend_Search_Lucene_Interface $index
* @return Zend_Search_Lucene_Search_Query
*/
public function optimize(Zend_Search_Lucene_Interface $index)
{
$subqueries = array();
$signs = array();
// Optimize all subqueries
foreach ($this->_subqueries as $id => $subquery) {
$subqueries[] = $subquery->optimize($index);
$signs[] = ($this->_signs === null)? true : $this->_signs[$id];
}
// Remove insignificant subqueries
foreach ($subqueries as $id => $subquery) {
if ($subquery instanceof Zend_Search_Lucene_Search_Query_Insignificant) {
// Insignificant subquery has to be removed anyway
unset($subqueries[$id]);
unset($signs[$id]);
}
}
if (count($subqueries) == 0) {
// Boolean query doesn't has non-insignificant subqueries
return new Zend_Search_Lucene_Search_Query_Insignificant();
}
// Check if all non-insignificant subqueries are prohibited
$allProhibited = true;
foreach ($signs as $sign) {
if ($sign !== false) {
$allProhibited = false;
break;
}
}
if ($allProhibited) {
return new Zend_Search_Lucene_Search_Query_Insignificant();
}
// Check for empty subqueries
foreach ($subqueries as $id => $subquery) {
if ($subquery instanceof Zend_Search_Lucene_Search_Query_Empty) {
if ($signs[$id] === true) {
// Matching is required, but is actually empty
return new Zend_Search_Lucene_Search_Query_Empty();
} else {
// Matching is optional or prohibited, but is empty
// Remove it from subqueries and signs list
unset($subqueries[$id]);
unset($signs[$id]);
}
}
}
// Check, if reduced subqueries list is empty
if (count($subqueries) == 0) {
return new Zend_Search_Lucene_Search_Query_Empty();
}
// Check if all non-empty subqueries are prohibited
$allProhibited = true;
foreach ($signs as $sign) {
if ($sign !== false) {
$allProhibited = false;
break;
}
}
if ($allProhibited) {
return new Zend_Search_Lucene_Search_Query_Empty();
}
// Check, if reduced subqueries list has only one entry
if (count($subqueries) == 1) {
// It's a query with only one required or optional clause
// (it's already checked, that it's not a prohibited clause)
if ($this->getBoost() == 1) {
return reset($subqueries);
}
$optimizedQuery = clone reset($subqueries);
$optimizedQuery->setBoost($optimizedQuery->getBoost()*$this->getBoost());
return $optimizedQuery;
}
// Prepare first candidate for optimized query
$optimizedQuery = new Zend_Search_Lucene_Search_Query_Boolean($subqueries, $signs);
$optimizedQuery->setBoost($this->getBoost());
$terms = array();
$tsigns = array();
$boostFactors = array();
// Try to decompose term and multi-term subqueries
foreach ($subqueries as $id => $subquery) {
if ($subquery instanceof Zend_Search_Lucene_Search_Query_Term) {
$terms[] = $subquery->getTerm();
$tsigns[] = $signs[$id];
$boostFactors[] = $subquery->getBoost();
// remove subquery from a subqueries list
unset($subqueries[$id]);
unset($signs[$id]);
} else if ($subquery instanceof Zend_Search_Lucene_Search_Query_MultiTerm) {
$subTerms = $subquery->getTerms();
$subSigns = $subquery->getSigns();
if ($signs[$id] === true) {
// It's a required multi-term subquery.
// Something like '... +(+term1 -term2 term3 ...) ...'
// Multi-term required subquery can be decomposed only if it contains
// required terms and doesn't contain prohibited terms:
// ... +(+term1 term2 ...) ... => ... +term1 term2 ...
//
// Check this
$hasRequired = false;
$hasProhibited = false;
if ($subSigns === null) {
// All subterms are required
$hasRequired = true;
} else {
foreach ($subSigns as $sign) {
if ($sign === true) {
$hasRequired = true;
} else if ($sign === false) {
$hasProhibited = true;
break;
}
}
}
// Continue if subquery has prohibited terms or doesn't have required terms
if ($hasProhibited || !$hasRequired) {
continue;
}
foreach ($subTerms as $termId => $term) {
$terms[] = $term;
$tsigns[] = ($subSigns === null)? true : $subSigns[$termId];
$boostFactors[] = $subquery->getBoost();
}
// remove subquery from a subqueries list
unset($subqueries[$id]);
unset($signs[$id]);
} else { // $signs[$id] === null || $signs[$id] === false
// It's an optional or prohibited multi-term subquery.
// Something like '... (+term1 -term2 term3 ...) ...'
// or
// something like '... -(+term1 -term2 term3 ...) ...'
// Multi-term optional and required subqueries can be decomposed
// only if all terms are optional.
//
// Check if all terms are optional.
$onlyOptional = true;
if ($subSigns === null) {
// All subterms are required
$onlyOptional = false;
} else {
foreach ($subSigns as $sign) {
if ($sign !== null) {
$onlyOptional = false;
break;
}
}
}
// Continue if non-optional terms are presented in this multi-term subquery
if (!$onlyOptional) {
continue;
}
foreach ($subTerms as $termId => $term) {
$terms[] = $term;
$tsigns[] = ($signs[$id] === null)? null /* optional */ :
false /* prohibited */;
$boostFactors[] = $subquery->getBoost();
}
// remove subquery from a subqueries list
unset($subqueries[$id]);
unset($signs[$id]);
}
}
}
// Check, if there are no decomposed subqueries
if (count($terms) == 0 ) {
// return prepared candidate
return $optimizedQuery;
}
// Check, if all subqueries have been decomposed and all terms has the same boost factor
if (count($subqueries) == 0 && count(array_unique($boostFactors)) == 1) {
$optimizedQuery = new Zend_Search_Lucene_Search_Query_MultiTerm($terms, $tsigns);
$optimizedQuery->setBoost(reset($boostFactors)*$this->getBoost());
return $optimizedQuery;
}
// This boolean query can't be transformed to Term/MultiTerm query and still contains
// several subqueries
// Separate prohibited terms
$prohibitedTerms = array();
foreach ($terms as $id => $term) {
if ($tsigns[$id] === false) {
$prohibitedTerms[] = $term;
unset($terms[$id]);
unset($tsigns[$id]);
unset($boostFactors[$id]);
}
}
if (count($terms) == 1) {
$clause = new Zend_Search_Lucene_Search_Query_Term(reset($terms));
$clause->setBoost(reset($boostFactors));
$subqueries[] = $clause;
$signs[] = reset($tsigns);
// Clear terms list
$terms = array();
} else if (count($terms) > 1 && count(array_unique($boostFactors)) == 1) {
$clause = new Zend_Search_Lucene_Search_Query_MultiTerm($terms, $tsigns);
$clause->setBoost(reset($boostFactors));
$subqueries[] = $clause;
// Clause sign is 'required' if clause contains required terms. 'Optional' otherwise.
$signs[] = (in_array(true, $tsigns))? true : null;
// Clear terms list
$terms = array();
}
if (count($prohibitedTerms) == 1) {
// (boost factors are not significant for prohibited clauses)
$subqueries[] = new Zend_Search_Lucene_Search_Query_Term(reset($prohibitedTerms));
$signs[] = false;
// Clear prohibited terms list
$prohibitedTerms = array();
} else if (count($prohibitedTerms) > 1) {
// prepare signs array
$prohibitedSigns = array();
foreach ($prohibitedTerms as $id => $term) {
// all prohibited term are grouped as optional into multi-term query
$prohibitedSigns[$id] = null;
}
// (boost factors are not significant for prohibited clauses)
$subqueries[] = new Zend_Search_Lucene_Search_Query_MultiTerm($prohibitedTerms, $prohibitedSigns);
// Clause sign is 'prohibited'
$signs[] = false;
// Clear terms list
$prohibitedTerms = array();
}
/** @todo Group terms with the same boost factors together */
// Check, that all terms are processed
// Replace candidate for optimized query
if (count($terms) == 0 && count($prohibitedTerms) == 0) {
$optimizedQuery = new Zend_Search_Lucene_Search_Query_Boolean($subqueries, $signs);
$optimizedQuery->setBoost($this->getBoost());
}
return $optimizedQuery;
}
/**
* Returns subqueries
*
* @return array
*/
public function getSubqueries()
{
return $this->_subqueries;
}
/**
* Return subqueries signs
*
* @return array
*/
public function getSigns()
{
return $this->_signs;
}
/**
* Constructs an appropriate Weight implementation for this query.
*
* @param Zend_Search_Lucene_Interface $reader
* @return Zend_Search_Lucene_Search_Weight
*/
public function createWeight(Zend_Search_Lucene_Interface $reader)
{
$this->_weight = new Zend_Search_Lucene_Search_Weight_Boolean($this, $reader);
return $this->_weight;
}
/**
* Calculate result vector for Conjunction query
* (like '<subquery1> AND <subquery2> AND <subquery3>')
*/
private function _calculateConjunctionResult()
{
$this->_resVector = null;
if (count($this->_subqueries) == 0) {
$this->_resVector = array();
}
$resVectors = array();
$resVectorsSizes = array();
$resVectorsIds = array(); // is used to prevent arrays comparison
foreach ($this->_subqueries as $subqueryId => $subquery) {
$resVectors[] = $subquery->matchedDocs();
$resVectorsSizes[] = count(end($resVectors));
$resVectorsIds[] = $subqueryId;
}
// sort resvectors in order of subquery cardinality increasing
array_multisort($resVectorsSizes, SORT_ASC, SORT_NUMERIC,
$resVectorsIds, SORT_ASC, SORT_NUMERIC,
$resVectors);
foreach ($resVectors as $nextResVector) {
if($this->_resVector === null) {
$this->_resVector = $nextResVector;
} else {
//$this->_resVector = array_intersect_key($this->_resVector, $nextResVector);
/**
* This code is used as workaround for array_intersect_key() slowness problem.
*/
$updatedVector = array();
foreach ($this->_resVector as $id => $value) {
if (isset($nextResVector[$id])) {
$updatedVector[$id] = $value;
}
}
$this->_resVector = $updatedVector;
}
if (count($this->_resVector) == 0) {
// Empty result set, we don't need to check other terms
break;
}
}
// ksort($this->_resVector, SORT_NUMERIC);
// Used algorithm doesn't change elements order
}
/**
* Calculate result vector for non Conjunction query
* (like '<subquery1> AND <subquery2> AND NOT <subquery3> OR <subquery4>')
*/
private function _calculateNonConjunctionResult()
{
$requiredVectors = array();
$requiredVectorsSizes = array();
$requiredVectorsIds = array(); // is used to prevent arrays comparison
$optional = array();
foreach ($this->_subqueries as $subqueryId => $subquery) {
if ($this->_signs[$subqueryId] === true) {
// required
$requiredVectors[] = $subquery->matchedDocs();
$requiredVectorsSizes[] = count(end($requiredVectors));
$requiredVectorsIds[] = $subqueryId;
} elseif ($this->_signs[$subqueryId] === false) {
// prohibited
// Do nothing. matchedDocs() may include non-matching id's
// Calculating prohibited vector may take significant time, but do not affect the result
// Skipped.
} else {
// neither required, nor prohibited
// array union
$optional += $subquery->matchedDocs();
}
}
// sort resvectors in order of subquery cardinality increasing
array_multisort($requiredVectorsSizes, SORT_ASC, SORT_NUMERIC,
$requiredVectorsIds, SORT_ASC, SORT_NUMERIC,
$requiredVectors);
$required = null;
foreach ($requiredVectors as $nextResVector) {
if($required === null) {
$required = $nextResVector;
} else {
//$required = array_intersect_key($required, $nextResVector);
/**
* This code is used as workaround for array_intersect_key() slowness problem.
*/
$updatedVector = array();
foreach ($required as $id => $value) {
if (isset($nextResVector[$id])) {
$updatedVector[$id] = $value;
}
}
$required = $updatedVector;
}
if (count($required) == 0) {
// Empty result set, we don't need to check other terms
break;
}
}
if ($required !== null) {
$this->_resVector = &$required;
} else {
$this->_resVector = &$optional;
}
ksort($this->_resVector, SORT_NUMERIC);
}
/**
* Score calculator for conjunction queries (all subqueries are required)
*
* @param integer $docId
* @param Zend_Search_Lucene_Interface $reader
* @return float
*/
public function _conjunctionScore($docId, Zend_Search_Lucene_Interface $reader)
{
if ($this->_coord === null) {
$this->_coord = $reader->getSimilarity()->coord(count($this->_subqueries),
count($this->_subqueries) );
}
$score = 0;
foreach ($this->_subqueries as $subquery) {
$subscore = $subquery->score($docId, $reader);
if ($subscore == 0) {
return 0;
}
$score += $subquery->score($docId, $reader) * $this->_coord;
}
return $score * $this->_coord * $this->getBoost();
}
/**
* Score calculator for non conjunction queries (not all subqueries are required)
*
* @param integer $docId
* @param Zend_Search_Lucene_Interface $reader
* @return float
*/
public function _nonConjunctionScore($docId, Zend_Search_Lucene_Interface $reader)
{
if ($this->_coord === null) {
$this->_coord = array();
$maxCoord = 0;
foreach ($this->_signs as $sign) {
if ($sign !== false /* not prohibited */) {
$maxCoord++;
}
}
for ($count = 0; $count <= $maxCoord; $count++) {
$this->_coord[$count] = $reader->getSimilarity()->coord($count, $maxCoord);
}
}
$score = 0;
$matchedSubqueries = 0;
foreach ($this->_subqueries as $subqueryId => $subquery) {
$subscore = $subquery->score($docId, $reader);
// Prohibited
if ($this->_signs[$subqueryId] === false && $subscore != 0) {
return 0;
}
// is required, but doen't match
if ($this->_signs[$subqueryId] === true && $subscore == 0) {
return 0;
}
if ($subscore != 0) {
$matchedSubqueries++;
$score += $subscore;
}
}
return $score * $this->_coord[$matchedSubqueries] * $this->getBoost();
}
/**
* Execute query in context of index reader
* It also initializes necessary internal structures
*
* @param Zend_Search_Lucene_Interface $reader
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
*/
public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
{
// Initialize weight if it's not done yet
$this->_initWeight($reader);
if ($docsFilter === null) {
// Create local documents filter if it's not provided by upper query
$docsFilter = new Zend_Search_Lucene_Index_DocsFilter();
}
foreach ($this->_subqueries as $subqueryId => $subquery) {
if ($this->_signs == null || $this->_signs[$subqueryId] === true) {
// Subquery is required
$subquery->execute($reader, $docsFilter);
} else {
$subquery->execute($reader);
}
}
if ($this->_signs === null) {
$this->_calculateConjunctionResult();
} else {
$this->_calculateNonConjunctionResult();
}
}
/**
* Get document ids likely matching the query
*
* It's an array with document ids as keys (performance considerations)
*
* @return array
*/
public function matchedDocs()
{
return $this->_resVector;
}
/**
* Score specified document
*
* @param integer $docId
* @param Zend_Search_Lucene_Interface $reader
* @return float
*/
public function score($docId, Zend_Search_Lucene_Interface $reader)
{
if (isset($this->_resVector[$docId])) {
if ($this->_signs === null) {
return $this->_conjunctionScore($docId, $reader);
} else {
return $this->_nonConjunctionScore($docId, $reader);
}
} else {
return 0;
}
}
/**
* Return query terms
*
* @return array
*/
public function getQueryTerms()
{
$terms = array();
foreach ($this->_subqueries as $id => $subquery) {
if ($this->_signs === null || $this->_signs[$id] !== false) {
$terms = array_merge($terms, $subquery->getQueryTerms());
}
}
return $terms;
}
/**
* Highlight query terms
*
* @param integer &$colorIndex
* @param Zend_Search_Lucene_Document_Html $doc
*/
public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)
{
foreach ($this->_subqueries as $id => $subquery) {
if ($this->_signs === null || $this->_signs[$id] !== false) {
$subquery->highlightMatchesDOM($doc, $colorIndex);
}
}
}
/**
* Print a query
*
* @return string
*/
public function __toString()
{
// It's used only for query visualisation, so we don't care about characters escaping
$query = '';
foreach ($this->_subqueries as $id => $subquery) {
if ($id != 0) {
$query .= ' ';
}
if ($this->_signs === null || $this->_signs[$id] === true) {
$query .= '+';
} else if ($this->_signs[$id] === false) {
$query .= '-';
}
$query .= '(' . $subquery->__toString() . ')';
if ($subquery->getBoost() != 1) {
$query .= '^' . round($subquery->getBoost(), 4);
}
}
return $query;
}
}
PK �H[o|��.&