Current File : /home/k/a/r/karenpetzb/www/items/category/Search.zip |
PK �HH[��� � QueryToken.phpnu &1i� <?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Exception */
require_once 'Zend/Search/Lucene/Exception.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Search_QueryToken
{
/**
* Token types.
*/
const TT_WORD = 0; // Word
const TT_PHRASE = 1; // Phrase (one or several quoted words)
const TT_FIELD = 2; // Field name in 'field:word', field:<phrase> or field:(<subquery>) pairs
const TT_FIELD_INDICATOR = 3; // ':'
const TT_REQUIRED = 4; // '+'
const TT_PROHIBITED = 5; // '-'
const TT_FUZZY_PROX_MARK = 6; // '~'
const TT_BOOSTING_MARK = 7; // '^'
const TT_RANGE_INCL_START = 8; // '['
const TT_RANGE_INCL_END = 9; // ']'
const TT_RANGE_EXCL_START = 10; // '{'
const TT_RANGE_EXCL_END = 11; // '}'
const TT_SUBQUERY_START = 12; // '('
const TT_SUBQUERY_END = 13; // ')'
const TT_AND_LEXEME = 14; // 'AND' or 'and'
const TT_OR_LEXEME = 15; // 'OR' or 'or'
const TT_NOT_LEXEME = 16; // 'NOT' or 'not'
const TT_TO_LEXEME = 17; // 'TO' or 'to'
const TT_NUMBER = 18; // Number, like: 10, 0.8, .64, ....
/**
* Returns all possible lexeme types.
* It's used for syntax analyzer state machine initialization
*
* @return array
*/
public static function getTypes()
{
return array( self::TT_WORD,
self::TT_PHRASE,
self::TT_FIELD,
self::TT_FIELD_INDICATOR,
self::TT_REQUIRED,
self::TT_PROHIBITED,
self::TT_FUZZY_PROX_MARK,
self::TT_BOOSTING_MARK,
self::TT_RANGE_INCL_START,
self::TT_RANGE_INCL_END,
self::TT_RANGE_EXCL_START,
self::TT_RANGE_EXCL_END,
self::TT_SUBQUERY_START,
self::TT_SUBQUERY_END,
self::TT_AND_LEXEME,
self::TT_OR_LEXEME,
self::TT_NOT_LEXEME,
self::TT_TO_LEXEME,
self::TT_NUMBER
);
}
/**
* TokenCategories
*/
const TC_WORD = 0; // Word
const TC_PHRASE = 1; // Phrase (one or several quoted words)
const TC_NUMBER = 2; // Nubers, which are used with syntax elements. Ex. roam~0.8
const TC_SYNTAX_ELEMENT = 3; // + - ( ) [ ] { } ! || && ~ ^
/**
* Token type.
*
* @var integer
*/
public $type;
/**
* Token text.
*
* @var integer
*/
public $text;
/**
* Token position within query.
*
* @var integer
*/
public $position;
/**
* IndexReader constructor needs token type and token text as a parameters.
*
* @param integer $tokenCategory
* @param string $tokText
* @param integer $position
*/
public function __construct($tokenCategory, $tokenText, $position)
{
$this->text = $tokenText;
$this->position = $position + 1; // Start from 1
switch ($tokenCategory) {
case self::TC_WORD:
if ( strtolower($tokenText) == 'and') {
$this->type = self::TT_AND_LEXEME;
} else if (strtolower($tokenText) == 'or') {
$this->type = self::TT_OR_LEXEME;
} else if (strtolower($tokenText) == 'not') {
$this->type = self::TT_NOT_LEXEME;
} else if (strtolower($tokenText) == 'to') {
$this->type = self::TT_TO_LEXEME;
} else {
$this->type = self::TT_WORD;
}
break;
case self::TC_PHRASE:
$this->type = self::TT_PHRASE;
break;
case self::TC_NUMBER:
$this->type = self::TT_NUMBER;
break;
case self::TC_SYNTAX_ELEMENT:
switch ($tokenText) {
case ':':
$this->type = self::TT_FIELD_INDICATOR;
break;
case '+':
$this->type = self::TT_REQUIRED;
break;
case '-':
$this->type = self::TT_PROHIBITED;
break;
case '~':
$this->type = self::TT_FUZZY_PROX_MARK;
break;
case '^':
$this->type = self::TT_BOOSTING_MARK;
break;
case '[':
$this->type = self::TT_RANGE_INCL_START;
break;
case ']':
$this->type = self::TT_RANGE_INCL_END;
break;
case '{':
$this->type = self::TT_RANGE_EXCL_START;
break;
case '}':
$this->type = self::TT_RANGE_EXCL_END;
break;
case '(':
$this->type = self::TT_SUBQUERY_START;
break;
case ')':
$this->type = self::TT_SUBQUERY_END;
break;
case '!':
$this->type = self::TT_NOT_LEXEME;
break;
case '&&':
$this->type = self::TT_AND_LEXEME;
break;
case '||':
$this->type = self::TT_OR_LEXEME;
break;
default:
throw new Zend_Search_Lucene_Exception('Unrecognized query syntax lexeme: \'' . $tokenText . '\'');
}
break;
case self::TC_NUMBER:
$this->type = self::TT_NUMBER;
default:
throw new Zend_Search_Lucene_Exception('Unrecognized lexeme type: \'' . $tokenCategory . '\'');
}
}
}
PK �HH[�W2G� � QueryEntry.phpnu &1i� <?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Index_Term */
require_once 'Zend/Search/Lucene/Index/Term.php';
/** Zend_Search_Lucene_Exception */
require_once 'Zend/Search/Lucene/Exception.php';
/** Zend_Search_Lucene_Search_QueryEntry_Term */
require_once 'Zend/Search/Lucene/Search/QueryEntry/Term.php';
/** Zend_Search_Lucene_Search_QueryEntry_Phrase */
require_once 'Zend/Search/Lucene/Search/QueryEntry/Phrase.php';
/** Zend_Search_Lucene_Search_QueryEntry_Subquery */
require_once 'Zend/Search/Lucene/Search/QueryEntry/Subquery.php';
/** Zend_Search_Lucene_Search_QueryParserException */
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
abstract class Zend_Search_Lucene_Search_QueryEntry
{
/**
* Query entry boost factor
*
* @var float
*/
protected $_boost = 1.0;
/**
* Process modifier ('~')
*
* @param mixed $parameter
*/
abstract public function processFuzzyProximityModifier($parameter = null);
/**
* Transform entry to a subquery
*
* @param string $encoding
* @return Zend_Search_Lucene_Search_Query
*/
abstract public function getQuery($encoding);
/**
* Boost query entry
*
* @param float $boostFactor
*/
public function boost($boostFactor)
{
$this->_boost *= $boostFactor;
}
}
PK �HH[�d_o]
]
Similarity/Default.phpnu &1i� <?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Search_Similarity */
require_once 'Zend/Search/Lucene/Search/Similarity.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Search_Similarity_Default extends Zend_Search_Lucene_Search_Similarity
{
/**
* Implemented as '1/sqrt(numTerms)'.
*
* @param string $fieldName
* @param integer $numTerms
* @return float
*/
public function lengthNorm($fieldName, $numTerms)
{
if ($numTerms == 0) {
return 1E10;
}
return 1.0/sqrt($numTerms);
}
/**
* Implemented as '1/sqrt(sumOfSquaredWeights)'.
*
* @param float $sumOfSquaredWeights
* @return float
*/
public function queryNorm($sumOfSquaredWeights)
{
return 1.0/sqrt($sumOfSquaredWeights);
}
/**
* Implemented as 'sqrt(freq)'.
*
* @param float $freq
* @return float
*/
public function tf($freq)
{
return sqrt($freq);
}
/**
* Implemented as '1/(distance + 1)'.
*
* @param integer $distance
* @return float
*/
public function sloppyFreq($distance)
{
return 1.0/($distance + 1);
}
/**
* Implemented as 'log(numDocs/(docFreq+1)) + 1'.
*
* @param integer $docFreq
* @param integer $numDocs
* @return float
*/
public function idfFreq($docFreq, $numDocs)
{
return log($numDocs/(float)($docFreq+1)) + 1.0;
}
/**
* Implemented as 'overlap/maxOverlap'.
*
* @param integer $overlap
* @param integer $maxOverlap
* @return float
*/
public function coord($overlap, $maxOverlap)
{
return $overlap/(float)$maxOverlap;
}
}
PK �HH[k#�GW
W
QueryHit.phpnu &1i� <?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Search_QueryHit
{
/**
* Object handle of the index
* @var Zend_Search_Lucene_Interface
*/
protected $_index = null;
/**
* Object handle of the document associated with this hit
* @var Zend_Search_Lucene_Document
*/
protected $_document = null;
/**
* Number of the document in the index
* @var integer
*/
public $id;
/**
* Score of the hit
* @var float
*/
public $score;
/**
* Constructor - pass object handle of Zend_Search_Lucene_Interface index that produced
* the hit so the document can be retrieved easily from the hit.
*
* @param Zend_Search_Lucene_Interface $index
*/
public function __construct(Zend_Search_Lucene_Interface $index)
{
$this->_index = new Zend_Search_Lucene_Proxy($index);
}
/**
* Convenience function for getting fields from the document
* associated with this hit.
*
* @param string $offset
* @return string
*/
public function __get($offset)
{
return $this->getDocument()->getFieldValue($offset);
}
/**
* Return the document object for this hit
*
* @return Zend_Search_Lucene_Document
*/
public function getDocument()
{
if (!$this->_document instanceof Zend_Search_Lucene_Document) {
$this->_document = $this->_index->getDocument($this->id);
}
return $this->_document;
}
/**
* Return the index object for this hit
*
* @return Zend_Search_Lucene_Interface
*/
public function getIndex()
{
return $this->_index;
}
}
PK �HH[Z@�� � Query.phpnu &1i� <?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Document_Html */
require_once 'Zend/Search/Lucene/Document/Html.php';
/** Zend_Search_Lucene_Index_DocsFilter */
require_once 'Zend/Search/Lucene/Index/DocsFilter.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
abstract class Zend_Search_Lucene_Search_Query
{
/**
* query boost factor
*
* @var float
*/
private $_boost = 1;
/**
* Query weight
*
* @var Zend_Search_Lucene_Search_Weight
*/
protected $_weight = null;
/**
* Current highlight color
*
* @var integer
*/
private $_currentColorIndex = 0;
/**
* List of colors for text highlighting
*
* @var array
*/
private $_highlightColors = array('#66ffff', '#ff66ff', '#ffff66',
'#ff8888', '#88ff88', '#8888ff',
'#88dddd', '#dd88dd', '#dddd88',
'#aaddff', '#aaffdd', '#ddaaff', '#ddffaa', '#ffaadd', '#ffddaa');
/**
* Gets the boost for this clause. Documents matching
* this clause will (in addition to the normal weightings) have their score
* multiplied by boost. The boost is 1.0 by default.
*
* @return float
*/
public function getBoost()
{
return $this->_boost;
}
/**
* Sets the boost for this query clause to $boost.
*
* @param float $boost
*/
public function setBoost($boost)
{
$this->_boost = $boost;
}
/**
* Score specified document
*
* @param integer $docId
* @param Zend_Search_Lucene_Interface $reader
* @return float
*/
abstract public function score($docId, Zend_Search_Lucene_Interface $reader);
/**
* Get document ids likely matching the query
*
* It's an array with document ids as keys (performance considerations)
*
* @return array
*/
abstract public function matchedDocs();
/**
* Execute query in context of index reader
* It also initializes necessary internal structures
*
* Query specific implementation
*
* @param Zend_Search_Lucene_Interface $reader
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
*/
abstract public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null);
/**
* Constructs an appropriate Weight implementation for this query.
*
* @param Zend_Search_Lucene_Interface $reader
* @return Zend_Search_Lucene_Search_Weight
*/
abstract public function createWeight(Zend_Search_Lucene_Interface $reader);
/**
* Constructs an initializes a Weight for a _top-level_query_.
*
* @param Zend_Search_Lucene_Interface $reader
*/
protected function _initWeight(Zend_Search_Lucene_Interface $reader)
{
// Check, that it's a top-level query and query weight is not initialized yet.
if ($this->_weight !== null) {
return $this->_weight;
}
$this->createWeight($reader);
$sum = $this->_weight->sumOfSquaredWeights();
$queryNorm = $reader->getSimilarity()->queryNorm($sum);
$this->_weight->normalize($queryNorm);
}
/**
* Re-write query into primitive queries in the context of specified index
*
* @param Zend_Search_Lucene_Interface $index
* @return Zend_Search_Lucene_Search_Query
*/
abstract public function rewrite(Zend_Search_Lucene_Interface $index);
/**
* Optimize query in the context of specified index
*
* @param Zend_Search_Lucene_Interface $index
* @return Zend_Search_Lucene_Search_Query
*/
abstract public function optimize(Zend_Search_Lucene_Interface $index);
/**
* Reset query, so it can be reused within other queries or
* with other indeces
*/
public function reset()
{
$this->_weight = null;
}
/**
* Print a query
*
* @return string
*/
abstract public function __toString();
/**
* Return query terms
*
* @return array
*/
abstract public function getQueryTerms();
/**
* Get highlight color and shift to next
*
* @param integer &$colorIndex
* @return string
*/
protected function _getHighlightColor(&$colorIndex)
{
$color = $this->_highlightColors[$colorIndex++];
$colorIndex %= count($this->_highlightColors);
return $color;
}
/**
* Highlight query terms
*
* @param integer &$colorIndex
* @param Zend_Search_Lucene_Document_Html $doc
*/
abstract public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex);
/**
* Highlight matches in $inputHTML
*
* @param string $inputHTML
* @return string
*/
public function highlightMatches($inputHTML)
{
$doc = Zend_Search_Lucene_Document_Html::loadHTML($inputHTML);
$colorIndex = 0;
$this->highlightMatchesDOM($doc, $colorIndex);
return $doc->getHTML();
}
}
PK �HH[�<��c �c QueryParser.phpnu &1i� <?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Index_Term */
require_once 'Zend/Search/Lucene/Index/Term.php';
/** Zend_Search_Lucene_Search_Query_Term */
require_once 'Zend/Search/Lucene/Search/Query/Term.php';
/** Zend_Search_Lucene_Search_Query_MultiTerm */
require_once 'Zend/Search/Lucene/Search/Query/MultiTerm.php';
/** Zend_Search_Lucene_Search_Query_Boolean */
require_once 'Zend/Search/Lucene/Search/Query/Boolean.php';
/** Zend_Search_Lucene_Search_Query_Phrase */
require_once 'Zend/Search/Lucene/Search/Query/Phrase.php';
/** Zend_Search_Lucene_Search_Query_Wildcard */
require_once 'Zend/Search/Lucene/Search/Query/Wildcard.php';
/** Zend_Search_Lucene_Search_Query_Range */
require_once 'Zend/Search/Lucene/Search/Query/Range.php';
/** Zend_Search_Lucene_Search_Query_Fuzzy */
require_once 'Zend/Search/Lucene/Search/Query/Fuzzy.php';
/** Zend_Search_Lucene_Search_Query_Empty */
require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
/** Zend_Search_Lucene_Search_Query_Insignificant */
require_once 'Zend/Search/Lucene/Search/Query/Insignificant.php';
/** Zend_Search_Lucene_Search_QueryLexer */
require_once 'Zend/Search/Lucene/Search/QueryLexer.php';
/** Zend_Search_Lucene_Search_QueryParserContext */
require_once 'Zend/Search/Lucene/Search/QueryParserContext.php';
/** Zend_Search_Lucene_FSM */
require_once 'Zend/Search/Lucene/FSM.php';
/** Zend_Search_Lucene_Exception */
require_once 'Zend/Search/Lucene/Exception.php';
/** Zend_Search_Lucene_Search_QueryParserException */
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
{
/**
* Parser instance
*
* @var Zend_Search_Lucene_Search_QueryParser
*/
private static $_instance = null;
/**
* Query lexer
*
* @var Zend_Search_Lucene_Search_QueryLexer
*/
private $_lexer;
/**
* Tokens list
* Array of Zend_Search_Lucene_Search_QueryToken objects
*
* @var array
*/
private $_tokens;
/**
* Current token
*
* @var integer|string
*/
private $_currentToken;
/**
* Last token
*
* It can be processed within FSM states, but this addirional state simplifies FSM
*
* @var Zend_Search_Lucene_Search_QueryToken
*/
private $_lastToken = null;
/**
* Range query first term
*
* @var string
*/
private $_rqFirstTerm = null;
/**
* Current query parser context
*
* @var Zend_Search_Lucene_Search_QueryParserContext
*/
private $_context;
/**
* Context stack
*
* @var array
*/
private $_contextStack;
/**
* Query string encoding
*
* @var string
*/
private $_encoding;
/**
* Query string default encoding
*
* @var string
*/
private $_defaultEncoding = '';
/**
* Defines query parsing mode.
*
* If this option is turned on, then query parser suppress query parser exceptions
* and constructs multi-term query using all words from a query.
*
* That helps to avoid exceptions caused by queries, which don't conform to query language,
* but limits possibilities to check, that query entered by user has some inconsistencies.
*
*
* Default is true.
*
* Use {@link Zend_Search_Lucene::suppressQueryParsingExceptions()},
* {@link Zend_Search_Lucene::dontSuppressQueryParsingExceptions()} and
* {@link Zend_Search_Lucene::checkQueryParsingExceptionsSuppressMode()} to operate
* with this setting.
*
* @var boolean
*/
private $_suppressQueryParsingExceptions = true;
/**
* Boolean operators constants
*/
const B_OR = 0;
const B_AND = 1;
/**
* Default boolean queries operator
*
* @var integer
*/
private $_defaultOperator = self::B_OR;
/** Query parser State Machine states */
const ST_COMMON_QUERY_ELEMENT = 0; // Terms, phrases, operators
const ST_CLOSEDINT_RQ_START = 1; // Range query start (closed interval) - '['
const ST_CLOSEDINT_RQ_FIRST_TERM = 2; // First term in '[term1 to term2]' construction
const ST_CLOSEDINT_RQ_TO_TERM = 3; // 'TO' lexeme in '[term1 to term2]' construction
const ST_CLOSEDINT_RQ_LAST_TERM = 4; // Second term in '[term1 to term2]' construction
const ST_CLOSEDINT_RQ_END = 5; // Range query end (closed interval) - ']'
const ST_OPENEDINT_RQ_START = 6; // Range query start (opened interval) - '{'
const ST_OPENEDINT_RQ_FIRST_TERM = 7; // First term in '{term1 to term2}' construction
const ST_OPENEDINT_RQ_TO_TERM = 8; // 'TO' lexeme in '{term1 to term2}' construction
const ST_OPENEDINT_RQ_LAST_TERM = 9; // Second term in '{term1 to term2}' construction
const ST_OPENEDINT_RQ_END = 10; // Range query end (opened interval) - '}'
/**
* Parser constructor
*/
public function __construct()
{
parent::__construct(array(self::ST_COMMON_QUERY_ELEMENT,
self::ST_CLOSEDINT_RQ_START,
self::ST_CLOSEDINT_RQ_FIRST_TERM,
self::ST_CLOSEDINT_RQ_TO_TERM,
self::ST_CLOSEDINT_RQ_LAST_TERM,
self::ST_CLOSEDINT_RQ_END,
self::ST_OPENEDINT_RQ_START,
self::ST_OPENEDINT_RQ_FIRST_TERM,
self::ST_OPENEDINT_RQ_TO_TERM,
self::ST_OPENEDINT_RQ_LAST_TERM,
self::ST_OPENEDINT_RQ_END
),
Zend_Search_Lucene_Search_QueryToken::getTypes());
$this->addRules(
array(array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_WORD, self::ST_COMMON_QUERY_ELEMENT),
array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_PHRASE, self::ST_COMMON_QUERY_ELEMENT),
array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_FIELD, self::ST_COMMON_QUERY_ELEMENT),
array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_REQUIRED, self::ST_COMMON_QUERY_ELEMENT),
array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_PROHIBITED, self::ST_COMMON_QUERY_ELEMENT),
array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_FUZZY_PROX_MARK, self::ST_COMMON_QUERY_ELEMENT),
array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_BOOSTING_MARK, self::ST_COMMON_QUERY_ELEMENT),
array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_RANGE_INCL_START, self::ST_CLOSEDINT_RQ_START),
array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_RANGE_EXCL_START, self::ST_OPENEDINT_RQ_START),
array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_SUBQUERY_START, self::ST_COMMON_QUERY_ELEMENT),
array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_SUBQUERY_END, self::ST_COMMON_QUERY_ELEMENT),
array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_AND_LEXEME, self::ST_COMMON_QUERY_ELEMENT),
array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_OR_LEXEME, self::ST_COMMON_QUERY_ELEMENT),
array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_NOT_LEXEME, self::ST_COMMON_QUERY_ELEMENT),
array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_NUMBER, self::ST_COMMON_QUERY_ELEMENT)
));
$this->addRules(
array(array(self::ST_CLOSEDINT_RQ_START, Zend_Search_Lucene_Search_QueryToken::TT_WORD, self::ST_CLOSEDINT_RQ_FIRST_TERM),
array(self::ST_CLOSEDINT_RQ_FIRST_TERM, Zend_Search_Lucene_Search_QueryToken::TT_TO_LEXEME, self::ST_CLOSEDINT_RQ_TO_TERM),
array(self::ST_CLOSEDINT_RQ_TO_TERM, Zend_Search_Lucene_Search_QueryToken::TT_WORD, self::ST_CLOSEDINT_RQ_LAST_TERM),
array(self::ST_CLOSEDINT_RQ_LAST_TERM, Zend_Search_Lucene_Search_QueryToken::TT_RANGE_INCL_END, self::ST_COMMON_QUERY_ELEMENT)
));
$this->addRules(
array(array(self::ST_OPENEDINT_RQ_START, Zend_Search_Lucene_Search_QueryToken::TT_WORD, self::ST_OPENEDINT_RQ_FIRST_TERM),
array(self::ST_OPENEDINT_RQ_FIRST_TERM, Zend_Search_Lucene_Search_QueryToken::TT_TO_LEXEME, self::ST_OPENEDINT_RQ_TO_TERM),
array(self::ST_OPENEDINT_RQ_TO_TERM, Zend_Search_Lucene_Search_QueryToken::TT_WORD, self::ST_OPENEDINT_RQ_LAST_TERM),
array(self::ST_OPENEDINT_RQ_LAST_TERM, Zend_Search_Lucene_Search_QueryToken::TT_RANGE_EXCL_END, self::ST_COMMON_QUERY_ELEMENT)
));
$addTermEntryAction = new Zend_Search_Lucene_FSMAction($this, 'addTermEntry');
$addPhraseEntryAction = new Zend_Search_Lucene_FSMAction($this, 'addPhraseEntry');
$setFieldAction = new Zend_Search_Lucene_FSMAction($this, 'setField');
$setSignAction = new Zend_Search_Lucene_FSMAction($this, 'setSign');
$setFuzzyProxAction = new Zend_Search_Lucene_FSMAction($this, 'processFuzzyProximityModifier');
$processModifierParameterAction = new Zend_Search_Lucene_FSMAction($this, 'processModifierParameter');
$subqueryStartAction = new Zend_Search_Lucene_FSMAction($this, 'subqueryStart');
$subqueryEndAction = new Zend_Search_Lucene_FSMAction($this, 'subqueryEnd');
$logicalOperatorAction = new Zend_Search_Lucene_FSMAction($this, 'logicalOperator');
$openedRQFirstTermAction = new Zend_Search_Lucene_FSMAction($this, 'openedRQFirstTerm');
$openedRQLastTermAction = new Zend_Search_Lucene_FSMAction($this, 'openedRQLastTerm');
$closedRQFirstTermAction = new Zend_Search_Lucene_FSMAction($this, 'closedRQFirstTerm');
$closedRQLastTermAction = new Zend_Search_Lucene_FSMAction($this, 'closedRQLastTerm');
$this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_WORD, $addTermEntryAction);
$this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_PHRASE, $addPhraseEntryAction);
$this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_FIELD, $setFieldAction);
$this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_REQUIRED, $setSignAction);
$this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_PROHIBITED, $setSignAction);
$this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_FUZZY_PROX_MARK, $setFuzzyProxAction);
$this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_NUMBER, $processModifierParameterAction);
$this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_SUBQUERY_START, $subqueryStartAction);
$this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_SUBQUERY_END, $subqueryEndAction);
$this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_AND_LEXEME, $logicalOperatorAction);
$this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_OR_LEXEME, $logicalOperatorAction);
$this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_NOT_LEXEME, $logicalOperatorAction);
$this->addEntryAction(self::ST_OPENEDINT_RQ_FIRST_TERM, $openedRQFirstTermAction);
$this->addEntryAction(self::ST_OPENEDINT_RQ_LAST_TERM, $openedRQLastTermAction);
$this->addEntryAction(self::ST_CLOSEDINT_RQ_FIRST_TERM, $closedRQFirstTermAction);
$this->addEntryAction(self::ST_CLOSEDINT_RQ_LAST_TERM, $closedRQLastTermAction);
$this->_lexer = new Zend_Search_Lucene_Search_QueryLexer();
}
/**
* Get query parser instance
*
* @return Zend_Search_Lucene_Search_QueryParser
*/
private static function _getInstance()
{
if (self::$_instance === null) {
self::$_instance = new self();
}
return self::$_instance;
}
/**
* Set query string default encoding
*
* @param string $encoding
*/
public static function setDefaultEncoding($encoding)
{
self::_getInstance()->_defaultEncoding = $encoding;
}
/**
* Get query string default encoding
*
* @return string
*/
public static function getDefaultEncoding()
{
return self::_getInstance()->_defaultEncoding;
}
/**
* Set default boolean operator
*
* @param integer $operator
*/
public static function setDefaultOperator($operator)
{
self::_getInstance()->_defaultOperator = $operator;
}
/**
* Get default boolean operator
*
* @return integer
*/
public static function getDefaultOperator()
{
return self::_getInstance()->_defaultOperator;
}
/**
* Turn on 'suppress query parser exceptions' mode.
*/
public static function suppressQueryParsingExceptions()
{
self::_getInstance()->_suppressQueryParsingExceptions = true;
}
/**
* Turn off 'suppress query parser exceptions' mode.
*/
public static function dontSuppressQueryParsingExceptions()
{
self::_getInstance()->_suppressQueryParsingExceptions = false;
}
/**
* Check 'suppress query parser exceptions' mode.
* @return boolean
*/
public static function queryParsingExceptionsSuppressed()
{
return self::_getInstance()->_suppressQueryParsingExceptions;
}
/**
* Parses a query string
*
* @param string $strQuery
* @param string $encoding
* @return Zend_Search_Lucene_Search_Query
* @throws Zend_Search_Lucene_Search_QueryParserException
*/
public static function parse($strQuery, $encoding = null)
{
self::_getInstance();
// Reset FSM if previous parse operation didn't return it into a correct state
self::$_instance->reset();
try {
self::$_instance->_encoding = ($encoding !== null) ? $encoding : self::$_instance->_defaultEncoding;
self::$_instance->_lastToken = null;
self::$_instance->_context = new Zend_Search_Lucene_Search_QueryParserContext(self::$_instance->_encoding);
self::$_instance->_contextStack = array();
self::$_instance->_tokens = self::$_instance->_lexer->tokenize($strQuery, self::$_instance->_encoding);
// Empty query
if (count(self::$_instance->_tokens) == 0) {
return new Zend_Search_Lucene_Search_Query_Insignificant();
}
foreach (self::$_instance->_tokens as $token) {
try {
self::$_instance->_currentToken = $token;
self::$_instance->process($token->type);
self::$_instance->_lastToken = $token;
} catch (Exception $e) {
if (strpos($e->getMessage(), 'There is no any rule for') !== false) {
throw new Zend_Search_Lucene_Search_QueryParserException( 'Syntax error at char position ' . $token->position . '.' );
}
throw $e;
}
}
if (count(self::$_instance->_contextStack) != 0) {
throw new Zend_Search_Lucene_Search_QueryParserException('Syntax Error: mismatched parentheses, every opening must have closing.' );
}
return self::$_instance->_context->getQuery();
} catch (Zend_Search_Lucene_Search_QueryParserException $e) {
if (self::$_instance->_suppressQueryParsingExceptions) {
$queryTokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($strQuery, self::$_instance->_encoding);
$query = new Zend_Search_Lucene_Search_Query_MultiTerm();
$termsSign = (self::$_instance->_defaultOperator == self::B_AND) ? true /* required term */ :
null /* optional term */;
foreach ($queryTokens as $token) {
$query->addTerm(new Zend_Search_Lucene_Index_Term($token->getTermText()), $termsSign);
}
return $query;
} else {
throw $e;
}
}
}
/*********************************************************************
* Actions implementation
*
* Actions affect on recognized lexemes list
*********************************************************************/
/**
* Add term to a query
*/
public function addTermEntry()
{
$entry = new Zend_Search_Lucene_Search_QueryEntry_Term($this->_currentToken->text, $this->_context->getField());
$this->_context->addEntry($entry);
}
/**
* Add phrase to a query
*/
public function addPhraseEntry()
{
$entry = new Zend_Search_Lucene_Search_QueryEntry_Phrase($this->_currentToken->text, $this->_context->getField());
$this->_context->addEntry($entry);
}
/**
* Set entry field
*/
public function setField()
{
$this->_context->setNextEntryField($this->_currentToken->text);
}
/**
* Set entry sign
*/
public function setSign()
{
$this->_context->setNextEntrySign($this->_currentToken->type);
}
/**
* Process fuzzy search/proximity modifier - '~'
*/
public function processFuzzyProximityModifier()
{
$this->_context->processFuzzyProximityModifier();
}
/**
* Process modifier parameter
*
* @throws Zend_Search_Lucene_Exception
*/
public function processModifierParameter()
{
if ($this->_lastToken === null) {
throw new Zend_Search_Lucene_Search_QueryParserException('Lexeme modifier parameter must follow lexeme modifier. Char position 0.' );
}
switch ($this->_lastToken->type) {
case Zend_Search_Lucene_Search_QueryToken::TT_FUZZY_PROX_MARK:
$this->_context->processFuzzyProximityModifier($this->_currentToken->text);
break;
case Zend_Search_Lucene_Search_QueryToken::TT_BOOSTING_MARK:
$this->_context->boost($this->_currentToken->text);
break;
default:
// It's not a user input exception
throw new Zend_Search_Lucene_Exception('Lexeme modifier parameter must follow lexeme modifier. Char position 0.' );
}
}
/**
* Start subquery
*/
public function subqueryStart()
{
$this->_contextStack[] = $this->_context;
$this->_context = new Zend_Search_Lucene_Search_QueryParserContext($this->_encoding, $this->_context->getField());
}
/**
* End subquery
*/
public function subqueryEnd()
{
if (count($this->_contextStack) == 0) {
throw new Zend_Search_Lucene_Search_QueryParserException('Syntax Error: mismatched parentheses, every opening must have closing. Char position ' . $this->_currentToken->position . '.' );
}
$query = $this->_context->getQuery();
$this->_context = array_pop($this->_contextStack);
$this->_context->addEntry(new Zend_Search_Lucene_Search_QueryEntry_Subquery($query));
}
/**
* Process logical operator
*/
public function logicalOperator()
{
$this->_context->addLogicalOperator($this->_currentToken->type);
}
/**
* Process first range query term (opened interval)
*/
public function openedRQFirstTerm()
{
$this->_rqFirstTerm = $this->_currentToken->text;
}
/**
* Process last range query term (opened interval)
*
* @throws Zend_Search_Lucene_Search_QueryParserException
*/
public function openedRQLastTerm()
{
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_rqFirstTerm, $this->_encoding);
if (count($tokens) > 1) {
throw new Zend_Search_Lucene_Search_QueryParserException('Range query boundary terms must be non-multiple word terms');
} else if (count($tokens) == 1) {
$from = new Zend_Search_Lucene_Index_Term(reset($tokens)->getTermText(), $this->_context->getField());
} else {
$from = null;
}
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_currentToken->text, $this->_encoding);
if (count($tokens) > 1) {
throw new Zend_Search_Lucene_Search_QueryParserException('Range query boundary terms must be non-multiple word terms');
} else if (count($tokens) == 1) {
$to = new Zend_Search_Lucene_Index_Term(reset($tokens)->getTermText(), $this->_context->getField());
} else {
$to = null;
}
if ($from === null && $to === null) {
throw new Zend_Search_Lucene_Search_QueryParserException('At least one range query boundary term must be non-empty term');
}
$rangeQuery = new Zend_Search_Lucene_Search_Query_Range($from, $to, false);
$entry = new Zend_Search_Lucene_Search_QueryEntry_Subquery($rangeQuery);
$this->_context->addEntry($entry);
}
/**
* Process first range query term (closed interval)
*/
public function closedRQFirstTerm()
{
$this->_rqFirstTerm = $this->_currentToken->text;
}
/**
* Process last range query term (closed interval)
*
* @throws Zend_Search_Lucene_Search_QueryParserException
*/
public function closedRQLastTerm()
{
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_rqFirstTerm, $this->_encoding);
if (count($tokens) > 1) {
throw new Zend_Search_Lucene_Search_QueryParserException('Range query boundary terms must be non-multiple word terms');
} else if (count($tokens) == 1) {
$from = new Zend_Search_Lucene_Index_Term(reset($tokens)->getTermText(), $this->_context->getField());
} else {
$from = null;
}
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_currentToken->text, $this->_encoding);
if (count($tokens) > 1) {
throw new Zend_Search_Lucene_Search_QueryParserException('Range query boundary terms must be non-multiple word terms');
} else if (count($tokens) == 1) {
$to = new Zend_Search_Lucene_Index_Term(reset($tokens)->getTermText(), $this->_context->getField());
} else {
$to = null;
}
if ($from === null && $to === null) {
throw new Zend_Search_Lucene_Search_QueryParserException('At least one range query boundary term must be non-empty term');
}
$rangeQuery = new Zend_Search_Lucene_Search_Query_Range($from, $to, true);
$entry = new Zend_Search_Lucene_Search_QueryEntry_Subquery($rangeQuery);
$this->_context->addEntry($entry);
}
}
PK �HH[���Y� � QueryParserException.phpnu &1i� <?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/**
* Zend_Search_Lucene base exception
*/
require_once 'Zend/Search/Lucene/Exception.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*
* Special exception type, which may be used to intercept wrong user input
*/
class Zend_Search_Lucene_Search_QueryParserException extends Zend_Search_Lucene_Exception
{}
PK �HH[/��qb qb QueryLexer.phpnu &1i� <?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_FSM */
require_once 'Zend/Search/Lucene/FSM.php';
/** Zend_Search_Lucene_Search_QueryParser */
require_once 'Zend/Search/Lucene/Search/QueryToken.php';
/** Zend_Search_Lucene_Exception */
require_once 'Zend/Search/Lucene/Exception.php';
/** Zend_Search_Lucene_Search_QueryParserException */
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Search_QueryLexer extends Zend_Search_Lucene_FSM
{
/** State Machine states */
const ST_WHITE_SPACE = 0;
const ST_SYNT_LEXEME = 1;
const ST_LEXEME = 2;
const ST_QUOTED_LEXEME = 3;
const ST_ESCAPED_CHAR = 4;
const ST_ESCAPED_QCHAR = 5;
const ST_LEXEME_MODIFIER = 6;
const ST_NUMBER = 7;
const ST_MANTISSA = 8;
const ST_ERROR = 9;
/** Input symbols */
const IN_WHITE_SPACE = 0;
const IN_SYNT_CHAR = 1;
const IN_LEXEME_MODIFIER = 2;
const IN_ESCAPE_CHAR = 3;
const IN_QUOTE = 4;
const IN_DECIMAL_POINT = 5;
const IN_ASCII_DIGIT = 6;
const IN_CHAR = 7;
const IN_MUTABLE_CHAR = 8;
const QUERY_WHITE_SPACE_CHARS = " \n\r\t";
const QUERY_SYNT_CHARS = ':()[]{}!|&';
const QUERY_MUTABLE_CHARS = '+-';
const QUERY_DOUBLECHARLEXEME_CHARS = '|&';
const QUERY_LEXEMEMODIFIER_CHARS = '~^';
const QUERY_ASCIIDIGITS_CHARS = '0123456789';
/**
* List of recognized lexemes
*
* @var array
*/
private $_lexemes;
/**
* Query string (array of single- or non single-byte characters)
*
* @var array
*/
private $_queryString;
/**
* Current position within a query string
* Used to create appropriate error messages
*
* @var integer
*/
private $_queryStringPosition;
/**
* Recognized part of current lexeme
*
* @var string
*/
private $_currentLexeme;
public function __construct()
{
parent::__construct( array(self::ST_WHITE_SPACE,
self::ST_SYNT_LEXEME,
self::ST_LEXEME,
self::ST_QUOTED_LEXEME,
self::ST_ESCAPED_CHAR,
self::ST_ESCAPED_QCHAR,
self::ST_LEXEME_MODIFIER,
self::ST_NUMBER,
self::ST_MANTISSA,
self::ST_ERROR),
array(self::IN_WHITE_SPACE,
self::IN_SYNT_CHAR,
self::IN_MUTABLE_CHAR,
self::IN_LEXEME_MODIFIER,
self::IN_ESCAPE_CHAR,
self::IN_QUOTE,
self::IN_DECIMAL_POINT,
self::IN_ASCII_DIGIT,
self::IN_CHAR));
$lexemeModifierErrorAction = new Zend_Search_Lucene_FSMAction($this, 'lexModifierErrException');
$quoteWithinLexemeErrorAction = new Zend_Search_Lucene_FSMAction($this, 'quoteWithinLexemeErrException');
$wrongNumberErrorAction = new Zend_Search_Lucene_FSMAction($this, 'wrongNumberErrException');
$this->addRules(array( array(self::ST_WHITE_SPACE, self::IN_WHITE_SPACE, self::ST_WHITE_SPACE),
array(self::ST_WHITE_SPACE, self::IN_SYNT_CHAR, self::ST_SYNT_LEXEME),
array(self::ST_WHITE_SPACE, self::IN_MUTABLE_CHAR, self::ST_SYNT_LEXEME),
array(self::ST_WHITE_SPACE, self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
array(self::ST_WHITE_SPACE, self::IN_ESCAPE_CHAR, self::ST_ESCAPED_CHAR),
array(self::ST_WHITE_SPACE, self::IN_QUOTE, self::ST_QUOTED_LEXEME),
array(self::ST_WHITE_SPACE, self::IN_DECIMAL_POINT, self::ST_LEXEME),
array(self::ST_WHITE_SPACE, self::IN_ASCII_DIGIT, self::ST_LEXEME),
array(self::ST_WHITE_SPACE, self::IN_CHAR, self::ST_LEXEME)
));
$this->addRules(array( array(self::ST_SYNT_LEXEME, self::IN_WHITE_SPACE, self::ST_WHITE_SPACE),
array(self::ST_SYNT_LEXEME, self::IN_SYNT_CHAR, self::ST_SYNT_LEXEME),
array(self::ST_SYNT_LEXEME, self::IN_MUTABLE_CHAR, self::ST_SYNT_LEXEME),
array(self::ST_SYNT_LEXEME, self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
array(self::ST_SYNT_LEXEME, self::IN_ESCAPE_CHAR, self::ST_ESCAPED_CHAR),
array(self::ST_SYNT_LEXEME, self::IN_QUOTE, self::ST_QUOTED_LEXEME),
array(self::ST_SYNT_LEXEME, self::IN_DECIMAL_POINT, self::ST_LEXEME),
array(self::ST_SYNT_LEXEME, self::IN_ASCII_DIGIT, self::ST_LEXEME),
array(self::ST_SYNT_LEXEME, self::IN_CHAR, self::ST_LEXEME)
));
$this->addRules(array( array(self::ST_LEXEME, self::IN_WHITE_SPACE, self::ST_WHITE_SPACE),
array(self::ST_LEXEME, self::IN_SYNT_CHAR, self::ST_SYNT_LEXEME),
array(self::ST_LEXEME, self::IN_MUTABLE_CHAR, self::ST_LEXEME),
array(self::ST_LEXEME, self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
array(self::ST_LEXEME, self::IN_ESCAPE_CHAR, self::ST_ESCAPED_CHAR),
// IN_QUOTE not allowed
array(self::ST_LEXEME, self::IN_QUOTE, self::ST_ERROR, $quoteWithinLexemeErrorAction),
array(self::ST_LEXEME, self::IN_DECIMAL_POINT, self::ST_LEXEME),
array(self::ST_LEXEME, self::IN_ASCII_DIGIT, self::ST_LEXEME),
array(self::ST_LEXEME, self::IN_CHAR, self::ST_LEXEME)
));
$this->addRules(array( array(self::ST_QUOTED_LEXEME, self::IN_WHITE_SPACE, self::ST_QUOTED_LEXEME),
array(self::ST_QUOTED_LEXEME, self::IN_SYNT_CHAR, self::ST_QUOTED_LEXEME),
array(self::ST_QUOTED_LEXEME, self::IN_MUTABLE_CHAR, self::ST_QUOTED_LEXEME),
array(self::ST_QUOTED_LEXEME, self::IN_LEXEME_MODIFIER, self::ST_QUOTED_LEXEME),
array(self::ST_QUOTED_LEXEME, self::IN_ESCAPE_CHAR, self::ST_ESCAPED_QCHAR),
array(self::ST_QUOTED_LEXEME, self::IN_QUOTE, self::ST_WHITE_SPACE),
array(self::ST_QUOTED_LEXEME, self::IN_DECIMAL_POINT, self::ST_QUOTED_LEXEME),
array(self::ST_QUOTED_LEXEME, self::IN_ASCII_DIGIT, self::ST_QUOTED_LEXEME),
array(self::ST_QUOTED_LEXEME, self::IN_CHAR, self::ST_QUOTED_LEXEME)
));
$this->addRules(array( array(self::ST_ESCAPED_CHAR, self::IN_WHITE_SPACE, self::ST_LEXEME),
array(self::ST_ESCAPED_CHAR, self::IN_SYNT_CHAR, self::ST_LEXEME),
array(self::ST_ESCAPED_CHAR, self::IN_MUTABLE_CHAR, self::ST_LEXEME),
array(self::ST_ESCAPED_CHAR, self::IN_LEXEME_MODIFIER, self::ST_LEXEME),
array(self::ST_ESCAPED_CHAR, self::IN_ESCAPE_CHAR, self::ST_LEXEME),
array(self::ST_ESCAPED_CHAR, self::IN_QUOTE, self::ST_LEXEME),
array(self::ST_ESCAPED_CHAR, self::IN_DECIMAL_POINT, self::ST_LEXEME),
array(self::ST_ESCAPED_CHAR, self::IN_ASCII_DIGIT, self::ST_LEXEME),
array(self::ST_ESCAPED_CHAR, self::IN_CHAR, self::ST_LEXEME)
));
$this->addRules(array( array(self::ST_ESCAPED_QCHAR, self::IN_WHITE_SPACE, self::ST_QUOTED_LEXEME),
array(self::ST_ESCAPED_QCHAR, self::IN_SYNT_CHAR, self::ST_QUOTED_LEXEME),
array(self::ST_ESCAPED_QCHAR, self::IN_MUTABLE_CHAR, self::ST_QUOTED_LEXEME),
array(self::ST_ESCAPED_QCHAR, self::IN_LEXEME_MODIFIER, self::ST_QUOTED_LEXEME),
array(self::ST_ESCAPED_QCHAR, self::IN_ESCAPE_CHAR, self::ST_QUOTED_LEXEME),
array(self::ST_ESCAPED_QCHAR, self::IN_QUOTE, self::ST_QUOTED_LEXEME),
array(self::ST_ESCAPED_QCHAR, self::IN_DECIMAL_POINT, self::ST_QUOTED_LEXEME),
array(self::ST_ESCAPED_QCHAR, self::IN_ASCII_DIGIT, self::ST_QUOTED_LEXEME),
array(self::ST_ESCAPED_QCHAR, self::IN_CHAR, self::ST_QUOTED_LEXEME)
));
$this->addRules(array( array(self::ST_LEXEME_MODIFIER, self::IN_WHITE_SPACE, self::ST_WHITE_SPACE),
array(self::ST_LEXEME_MODIFIER, self::IN_SYNT_CHAR, self::ST_SYNT_LEXEME),
array(self::ST_LEXEME_MODIFIER, self::IN_MUTABLE_CHAR, self::ST_SYNT_LEXEME),
array(self::ST_LEXEME_MODIFIER, self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
// IN_ESCAPE_CHAR not allowed
array(self::ST_LEXEME_MODIFIER, self::IN_ESCAPE_CHAR, self::ST_ERROR, $lexemeModifierErrorAction),
// IN_QUOTE not allowed
array(self::ST_LEXEME_MODIFIER, self::IN_QUOTE, self::ST_ERROR, $lexemeModifierErrorAction),
array(self::ST_LEXEME_MODIFIER, self::IN_DECIMAL_POINT, self::ST_MANTISSA),
array(self::ST_LEXEME_MODIFIER, self::IN_ASCII_DIGIT, self::ST_NUMBER),
// IN_CHAR not allowed
array(self::ST_LEXEME_MODIFIER, self::IN_CHAR, self::ST_ERROR, $lexemeModifierErrorAction),
));
$this->addRules(array( array(self::ST_NUMBER, self::IN_WHITE_SPACE, self::ST_WHITE_SPACE),
array(self::ST_NUMBER, self::IN_SYNT_CHAR, self::ST_SYNT_LEXEME),
array(self::ST_NUMBER, self::IN_MUTABLE_CHAR, self::ST_SYNT_LEXEME),
array(self::ST_NUMBER, self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
// IN_ESCAPE_CHAR not allowed
array(self::ST_NUMBER, self::IN_ESCAPE_CHAR, self::ST_ERROR, $wrongNumberErrorAction),
// IN_QUOTE not allowed
array(self::ST_NUMBER, self::IN_QUOTE, self::ST_ERROR, $wrongNumberErrorAction),
array(self::ST_NUMBER, self::IN_DECIMAL_POINT, self::ST_MANTISSA),
array(self::ST_NUMBER, self::IN_ASCII_DIGIT, self::ST_NUMBER),
// IN_CHAR not allowed
array(self::ST_NUMBER, self::IN_CHAR, self::ST_ERROR, $wrongNumberErrorAction),
));
$this->addRules(array( array(self::ST_MANTISSA, self::IN_WHITE_SPACE, self::ST_WHITE_SPACE),
array(self::ST_MANTISSA, self::IN_SYNT_CHAR, self::ST_SYNT_LEXEME),
array(self::ST_MANTISSA, self::IN_MUTABLE_CHAR, self::ST_SYNT_LEXEME),
array(self::ST_MANTISSA, self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
// IN_ESCAPE_CHAR not allowed
array(self::ST_MANTISSA, self::IN_ESCAPE_CHAR, self::ST_ERROR, $wrongNumberErrorAction),
// IN_QUOTE not allowed
array(self::ST_MANTISSA, self::IN_QUOTE, self::ST_ERROR, $wrongNumberErrorAction),
// IN_DECIMAL_POINT not allowed
array(self::ST_MANTISSA, self::IN_DECIMAL_POINT, self::ST_ERROR, $wrongNumberErrorAction),
array(self::ST_MANTISSA, self::IN_ASCII_DIGIT, self::ST_MANTISSA),
// IN_CHAR not allowed
array(self::ST_MANTISSA, self::IN_CHAR, self::ST_ERROR, $wrongNumberErrorAction),
));
/** Actions */
$syntaxLexemeAction = new Zend_Search_Lucene_FSMAction($this, 'addQuerySyntaxLexeme');
$lexemeModifierAction = new Zend_Search_Lucene_FSMAction($this, 'addLexemeModifier');
$addLexemeAction = new Zend_Search_Lucene_FSMAction($this, 'addLexeme');
$addQuotedLexemeAction = new Zend_Search_Lucene_FSMAction($this, 'addQuotedLexeme');
$addNumberLexemeAction = new Zend_Search_Lucene_FSMAction($this, 'addNumberLexeme');
$addLexemeCharAction = new Zend_Search_Lucene_FSMAction($this, 'addLexemeChar');
/** Syntax lexeme */
$this->addEntryAction(self::ST_SYNT_LEXEME, $syntaxLexemeAction);
// Two lexemes in succession
$this->addTransitionAction(self::ST_SYNT_LEXEME, self::ST_SYNT_LEXEME, $syntaxLexemeAction);
/** Lexeme */
$this->addEntryAction(self::ST_LEXEME, $addLexemeCharAction);
$this->addTransitionAction(self::ST_LEXEME, self::ST_LEXEME, $addLexemeCharAction);
// ST_ESCAPED_CHAR => ST_LEXEME transition is covered by ST_LEXEME entry action
$this->addTransitionAction(self::ST_LEXEME, self::ST_WHITE_SPACE, $addLexemeAction);
$this->addTransitionAction(self::ST_LEXEME, self::ST_SYNT_LEXEME, $addLexemeAction);
$this->addTransitionAction(self::ST_LEXEME, self::ST_QUOTED_LEXEME, $addLexemeAction);
$this->addTransitionAction(self::ST_LEXEME, self::ST_LEXEME_MODIFIER, $addLexemeAction);
$this->addTransitionAction(self::ST_LEXEME, self::ST_NUMBER, $addLexemeAction);
$this->addTransitionAction(self::ST_LEXEME, self::ST_MANTISSA, $addLexemeAction);
/** Quoted lexeme */
// We don't need entry action (skeep quote)
$this->addTransitionAction(self::ST_QUOTED_LEXEME, self::ST_QUOTED_LEXEME, $addLexemeCharAction);
$this->addTransitionAction(self::ST_ESCAPED_QCHAR, self::ST_QUOTED_LEXEME, $addLexemeCharAction);
// Closing quote changes state to the ST_WHITE_SPACE other states are not used
$this->addTransitionAction(self::ST_QUOTED_LEXEME, self::ST_WHITE_SPACE, $addQuotedLexemeAction);
/** Lexeme modifier */
$this->addEntryAction(self::ST_LEXEME_MODIFIER, $lexemeModifierAction);
/** Number */
$this->addEntryAction(self::ST_NUMBER, $addLexemeCharAction);
$this->addEntryAction(self::ST_MANTISSA, $addLexemeCharAction);
$this->addTransitionAction(self::ST_NUMBER, self::ST_NUMBER, $addLexemeCharAction);
// ST_NUMBER => ST_MANTISSA transition is covered by ST_MANTISSA entry action
$this->addTransitionAction(self::ST_MANTISSA, self::ST_MANTISSA, $addLexemeCharAction);
$this->addTransitionAction(self::ST_NUMBER, self::ST_WHITE_SPACE, $addNumberLexemeAction);
$this->addTransitionAction(self::ST_NUMBER, self::ST_SYNT_LEXEME, $addNumberLexemeAction);
$this->addTransitionAction(self::ST_NUMBER, self::ST_LEXEME_MODIFIER, $addNumberLexemeAction);
$this->addTransitionAction(self::ST_MANTISSA, self::ST_WHITE_SPACE, $addNumberLexemeAction);
$this->addTransitionAction(self::ST_MANTISSA, self::ST_SYNT_LEXEME, $addNumberLexemeAction);
$this->addTransitionAction(self::ST_MANTISSA, self::ST_LEXEME_MODIFIER, $addNumberLexemeAction);
}
/**
* Translate input char to an input symbol of state machine
*
* @param string $char
* @return integer
*/
private function _translateInput($char)
{
if (strpos(self::QUERY_WHITE_SPACE_CHARS, $char) !== false) { return self::IN_WHITE_SPACE;
} else if (strpos(self::QUERY_SYNT_CHARS, $char) !== false) { return self::IN_SYNT_CHAR;
} else if (strpos(self::QUERY_MUTABLE_CHARS, $char) !== false) { return self::IN_MUTABLE_CHAR;
} else if (strpos(self::QUERY_LEXEMEMODIFIER_CHARS, $char) !== false) { return self::IN_LEXEME_MODIFIER;
} else if (strpos(self::QUERY_ASCIIDIGITS_CHARS, $char) !== false) { return self::IN_ASCII_DIGIT;
} else if ($char === '"' ) { return self::IN_QUOTE;
} else if ($char === '.' ) { return self::IN_DECIMAL_POINT;
} else if ($char === '\\') { return self::IN_ESCAPE_CHAR;
} else { return self::IN_CHAR;
}
}
/**
* This method is used to tokenize query string into lexemes
*
* @param string $inputString
* @param string $encoding
* @return array
* @throws Zend_Search_Lucene_Search_QueryParserException
*/
public function tokenize($inputString, $encoding)
{
$this->reset();
$this->_lexemes = array();
$this->_queryString = array();
if (PHP_OS == 'AIX' && $encoding == '') {
$encoding = 'ISO8859-1';
}
$strLength = iconv_strlen($inputString, $encoding);
// Workaround for iconv_substr bug
$inputString .= ' ';
for ($count = 0; $count < $strLength; $count++) {
$this->_queryString[$count] = iconv_substr($inputString, $count, 1, $encoding);
}
for ($this->_queryStringPosition = 0;
$this->_queryStringPosition < count($this->_queryString);
$this->_queryStringPosition++) {
$this->process($this->_translateInput($this->_queryString[$this->_queryStringPosition]));
}
$this->process(self::IN_WHITE_SPACE);
if ($this->getState() != self::ST_WHITE_SPACE) {
throw new Zend_Search_Lucene_Search_QueryParserException('Unexpected end of query');
}
$this->_queryString = null;
return $this->_lexemes;
}
/*********************************************************************
* Actions implementation
*
* Actions affect on recognized lexemes list
*********************************************************************/
/**
* Add query syntax lexeme
*
* @throws Zend_Search_Lucene_Search_QueryParserException
*/
public function addQuerySyntaxLexeme()
{
$lexeme = $this->_queryString[$this->_queryStringPosition];
// Process two char lexemes
if (strpos(self::QUERY_DOUBLECHARLEXEME_CHARS, $lexeme) !== false) {
// increase current position in a query string
$this->_queryStringPosition++;
// check,
if ($this->_queryStringPosition == count($this->_queryString) ||
$this->_queryString[$this->_queryStringPosition] != $lexeme) {
throw new Zend_Search_Lucene_Search_QueryParserException('Two chars lexeme expected. ' . $this->_positionMsg());
}
// duplicate character
$lexeme .= $lexeme;
}
$token = new Zend_Search_Lucene_Search_QueryToken(
Zend_Search_Lucene_Search_QueryToken::TC_SYNTAX_ELEMENT,
$lexeme,
$this->_queryStringPosition);
// Skip this lexeme if it's a field indicator ':' and treat previous as 'field' instead of 'word'
if ($token->type == Zend_Search_Lucene_Search_QueryToken::TT_FIELD_INDICATOR) {
$token = array_pop($this->_lexemes);
if ($token === null || $token->type != Zend_Search_Lucene_Search_QueryToken::TT_WORD) {
throw new Zend_Search_Lucene_Search_QueryParserException('Field mark \':\' must follow field name. ' . $this->_positionMsg());
}
$token->type = Zend_Search_Lucene_Search_QueryToken::TT_FIELD;
}
$this->_lexemes[] = $token;
}
/**
* Add lexeme modifier
*/
public function addLexemeModifier()
{
$this->_lexemes[] = new Zend_Search_Lucene_Search_QueryToken(
Zend_Search_Lucene_Search_QueryToken::TC_SYNTAX_ELEMENT,
$this->_queryString[$this->_queryStringPosition],
$this->_queryStringPosition);
}
/**
* Add lexeme
*/
public function addLexeme()
{
$this->_lexemes[] = new Zend_Search_Lucene_Search_QueryToken(
Zend_Search_Lucene_Search_QueryToken::TC_WORD,
$this->_currentLexeme,
$this->_queryStringPosition - 1);
$this->_currentLexeme = '';
}
/**
* Add quoted lexeme
*/
public function addQuotedLexeme()
{
$this->_lexemes[] = new Zend_Search_Lucene_Search_QueryToken(
Zend_Search_Lucene_Search_QueryToken::TC_PHRASE,
$this->_currentLexeme,
$this->_queryStringPosition);
$this->_currentLexeme = '';
}
/**
* Add number lexeme
*/
public function addNumberLexeme()
{
$this->_lexemes[] = new Zend_Search_Lucene_Search_QueryToken(
Zend_Search_Lucene_Search_QueryToken::TC_NUMBER,
$this->_currentLexeme,
$this->_queryStringPosition - 1);
$this->_currentLexeme = '';
}
/**
* Extend lexeme by one char
*/
public function addLexemeChar()
{
$this->_currentLexeme .= $this->_queryString[$this->_queryStringPosition];
}
/**
* Position message
*
* @return string
*/
private function _positionMsg()
{
return 'Position is ' . $this->_queryStringPosition . '.';
}
/*********************************************************************
* Syntax errors actions
*********************************************************************/
public function lexModifierErrException()
{
throw new Zend_Search_Lucene_Search_QueryParserException('Lexeme modifier character can be followed only by number, white space or query syntax element. ' . $this->_positionMsg());
}
public function quoteWithinLexemeErrException()
{
throw new Zend_Search_Lucene_Search_QueryParserException('Quote within lexeme must be escaped by \'\\\' char. ' . $this->_positionMsg());
}
public function wrongNumberErrException()
{
throw new Zend_Search_Lucene_Search_QueryParserException('Wrong number syntax.' . $this->_positionMsg());
}
}
PK �HH[Md�]) )
Weight.phpnu &1i� <?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/**
* Calculate query weights and build query scorers.
*
* A Weight is constructed by a query Query->createWeight().
* The sumOfSquaredWeights() method is then called on the top-level
* query to compute the query normalization factor Similarity->queryNorm(float).
* This factor is then passed to normalize(float). At this point the weighting
* is complete.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
abstract class Zend_Search_Lucene_Search_Weight
{
/**
* Normalization factor.
* This value is stored only for query expanation purpose and not used in any other place
*
* @var float
*/
protected $_queryNorm;
/**
* Weight value
*
* Weight value may be initialized in sumOfSquaredWeights() or normalize()
* because they both are invoked either in Query::_initWeight (for top-level query) or
* in corresponding methods of parent query's weights
*
* @var float
*/
protected $_value;
/**
* The weight for this query.
*
* @return float
*/
public function getValue()
{
return $this->_value;
}
/**
* The sum of squared weights of contained query clauses.
*
* @return float
*/
abstract public function sumOfSquaredWeights();
/**
* Assigns the query normalization factor to this.
*
* @param $norm
*/
abstract public function normalize($norm);
}
PK �HH[G�(<�` �` Similarity.phpnu &1i� <?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Search_Similarity_Default */
require_once 'Zend/Search/Lucene/Search/Similarity/Default.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
abstract class Zend_Search_Lucene_Search_Similarity
{
/**
* The Similarity implementation used by default.
*
* @var Zend_Search_Lucene_Search_Similarity
*/
private static $_defaultImpl;
/**
* Cache of decoded bytes.
* Array of floats
*
* @var array
*/
private static $_normTable = array( 0 => 0.0,
1 => 5.820766E-10,
2 => 6.9849193E-10,
3 => 8.1490725E-10,
4 => 9.313226E-10,
5 => 1.1641532E-9,
6 => 1.3969839E-9,
7 => 1.6298145E-9,
8 => 1.8626451E-9,
9 => 2.3283064E-9,
10 => 2.7939677E-9,
11 => 3.259629E-9,
12 => 3.7252903E-9,
13 => 4.656613E-9,
14 => 5.5879354E-9,
15 => 6.519258E-9,
16 => 7.4505806E-9,
17 => 9.313226E-9,
18 => 1.1175871E-8,
19 => 1.3038516E-8,
20 => 1.4901161E-8,
21 => 1.8626451E-8,
22 => 2.2351742E-8,
23 => 2.6077032E-8,
24 => 2.9802322E-8,
25 => 3.7252903E-8,
26 => 4.4703484E-8,
27 => 5.2154064E-8,
28 => 5.9604645E-8,
29 => 7.4505806E-8,
30 => 8.940697E-8,
31 => 1.0430813E-7,
32 => 1.1920929E-7,
33 => 1.4901161E-7,
34 => 1.7881393E-7,
35 => 2.0861626E-7,
36 => 2.3841858E-7,
37 => 2.9802322E-7,
38 => 3.5762787E-7,
39 => 4.172325E-7,
40 => 4.7683716E-7,
41 => 5.9604645E-7,
42 => 7.1525574E-7,
43 => 8.34465E-7,
44 => 9.536743E-7,
45 => 1.1920929E-6,
46 => 1.4305115E-6,
47 => 1.66893E-6,
48 => 1.9073486E-6,
49 => 2.3841858E-6,
50 => 2.861023E-6,
51 => 3.33786E-6,
52 => 3.8146973E-6,
53 => 4.7683716E-6,
54 => 5.722046E-6,
55 => 6.67572E-6,
56 => 7.6293945E-6,
57 => 9.536743E-6,
58 => 1.1444092E-5,
59 => 1.335144E-5,
60 => 1.5258789E-5,
61 => 1.9073486E-5,
62 => 2.2888184E-5,
63 => 2.670288E-5,
64 => 3.0517578E-5,
65 => 3.8146973E-5,
66 => 4.5776367E-5,
67 => 5.340576E-5,
68 => 6.1035156E-5,
69 => 7.6293945E-5,
70 => 9.1552734E-5,
71 => 1.0681152E-4,
72 => 1.2207031E-4,
73 => 1.5258789E-4,
74 => 1.8310547E-4,
75 => 2.1362305E-4,
76 => 2.4414062E-4,
77 => 3.0517578E-4,
78 => 3.6621094E-4,
79 => 4.272461E-4,
80 => 4.8828125E-4,
81 => 6.1035156E-4,
82 => 7.324219E-4,
83 => 8.544922E-4,
84 => 9.765625E-4,
85 => 0.0012207031,
86 => 0.0014648438,
87 => 0.0017089844,
88 => 0.001953125,
89 => 0.0024414062,
90 => 0.0029296875,
91 => 0.0034179688,
92 => 0.00390625,
93 => 0.0048828125,
94 => 0.005859375,
95 => 0.0068359375,
96 => 0.0078125,
97 => 0.009765625,
98 => 0.01171875,
99 => 0.013671875,
100 => 0.015625,
101 => 0.01953125,
102 => 0.0234375,
103 => 0.02734375,
104 => 0.03125,
105 => 0.0390625,
106 => 0.046875,
107 => 0.0546875,
108 => 0.0625,
109 => 0.078125,
110 => 0.09375,
111 => 0.109375,
112 => 0.125,
113 => 0.15625,
114 => 0.1875,
115 => 0.21875,
116 => 0.25,
117 => 0.3125,
118 => 0.375,
119 => 0.4375,
120 => 0.5,
121 => 0.625,
122 => 0.75,
123 => 0.875,
124 => 1.0,
125 => 1.25,
126 => 1.5,
127 => 1.75,
128 => 2.0,
129 => 2.5,
130 => 3.0,
131 => 3.5,
132 => 4.0,
133 => 5.0,
134 => 6.0,
135 => 7.0,
136 => 8.0,
137 => 10.0,
138 => 12.0,
139 => 14.0,
140 => 16.0,
141 => 20.0,
142 => 24.0,
143 => 28.0,
144 => 32.0,
145 => 40.0,
146 => 48.0,
147 => 56.0,
148 => 64.0,
149 => 80.0,
150 => 96.0,
151 => 112.0,
152 => 128.0,
153 => 160.0,
154 => 192.0,
155 => 224.0,
156 => 256.0,
157 => 320.0,
158 => 384.0,
159 => 448.0,
160 => 512.0,
161 => 640.0,
162 => 768.0,
163 => 896.0,
164 => 1024.0,
165 => 1280.0,
166 => 1536.0,
167 => 1792.0,
168 => 2048.0,
169 => 2560.0,
170 => 3072.0,
171 => 3584.0,
172 => 4096.0,
173 => 5120.0,
174 => 6144.0,
175 => 7168.0,
176 => 8192.0,
177 => 10240.0,
178 => 12288.0,
179 => 14336.0,
180 => 16384.0,
181 => 20480.0,
182 => 24576.0,
183 => 28672.0,
184 => 32768.0,
185 => 40960.0,
186 => 49152.0,
187 => 57344.0,
188 => 65536.0,
189 => 81920.0,
190 => 98304.0,
191 => 114688.0,
192 => 131072.0,
193 => 163840.0,
194 => 196608.0,
195 => 229376.0,
196 => 262144.0,
197 => 327680.0,
198 => 393216.0,
199 => 458752.0,
200 => 524288.0,
201 => 655360.0,
202 => 786432.0,
203 => 917504.0,
204 => 1048576.0,
205 => 1310720.0,
206 => 1572864.0,
207 => 1835008.0,
208 => 2097152.0,
209 => 2621440.0,
210 => 3145728.0,
211 => 3670016.0,
212 => 4194304.0,
213 => 5242880.0,
214 => 6291456.0,
215 => 7340032.0,
216 => 8388608.0,
217 => 1.048576E7,
218 => 1.2582912E7,
219 => 1.4680064E7,
220 => 1.6777216E7,
221 => 2.097152E7,
222 => 2.5165824E7,
223 => 2.9360128E7,
224 => 3.3554432E7,
225 => 4.194304E7,
226 => 5.0331648E7,
227 => 5.8720256E7,
228 => 6.7108864E7,
229 => 8.388608E7,
230 => 1.00663296E8,
231 => 1.17440512E8,
232 => 1.34217728E8,
233 => 1.6777216E8,
234 => 2.01326592E8,
235 => 2.34881024E8,
236 => 2.68435456E8,
237 => 3.3554432E8,
238 => 4.02653184E8,
239 => 4.69762048E8,
240 => 5.3687091E8,
241 => 6.7108864E8,
242 => 8.0530637E8,
243 => 9.395241E8,
244 => 1.07374182E9,
245 => 1.34217728E9,
246 => 1.61061274E9,
247 => 1.87904819E9,
248 => 2.14748365E9,
249 => 2.68435456E9,
250 => 3.22122547E9,
251 => 3.75809638E9,
252 => 4.2949673E9,
253 => 5.3687091E9,
254 => 6.4424509E9,
255 => 7.5161928E9 );
/**
* Set the default Similarity implementation used by indexing and search
* code.
*
* @param Zend_Search_Lucene_Search_Similarity $similarity
*/
public static function setDefault(Zend_Search_Lucene_Search_Similarity $similarity)
{
self::$_defaultImpl = $similarity;
}
/**
* Return the default Similarity implementation used by indexing and search
* code.
*
* @return Zend_Search_Lucene_Search_Similarity
*/
public static function getDefault()
{
if (!self::$_defaultImpl instanceof Zend_Search_Lucene_Search_Similarity) {
self::$_defaultImpl = new Zend_Search_Lucene_Search_Similarity_Default();
}
return self::$_defaultImpl;
}
/**
* Computes the normalization value for a field given the total number of
* terms contained in a field. These values, together with field boosts, are
* stored in an index and multipled into scores for hits on each field by the
* search code.
*
* Matches in longer fields are less precise, so implemenations of this
* method usually return smaller values when 'numTokens' is large,
* and larger values when 'numTokens' is small.
*
* That these values are computed under
* IndexWriter::addDocument(Document) and stored then using
* encodeNorm(float). Thus they have limited precision, and documents
* must be re-indexed if this method is altered.
*
* fieldName - name of field
* numTokens - the total number of tokens contained in fields named
* 'fieldName' of 'doc'.
* Returns a normalization factor for hits on this field of this document
*
* @param string $fieldName
* @param integer $numTokens
* @return float
*/
abstract public function lengthNorm($fieldName, $numTokens);
/**
* Computes the normalization value for a query given the sum of the squared
* weights of each of the query terms. This value is then multipled into the
* weight of each query term.
*
* This does not affect ranking, but rather just attempts to make scores
* from different queries comparable.
*
* sumOfSquaredWeights - the sum of the squares of query term weights
* Returns a normalization factor for query weights
*
* @param float $sumOfSquaredWeights
* @return float
*/
abstract public function queryNorm($sumOfSquaredWeights);
/**
* Decodes a normalization factor stored in an index.
*
* @param integer $byte
* @return float
*/
public static function decodeNorm($byte)
{
return self::$_normTable[$byte & 0xFF];
}
/**
* Encodes a normalization factor for storage in an index.
*
* The encoding uses a five-bit exponent and three-bit mantissa, thus
* representing values from around 7x10^9 to 2x10^-9 with about one
* significant decimal digit of accuracy. Zero is also represented.
* Negative numbers are rounded up to zero. Values too large to represent
* are rounded down to the largest representable value. Positive values too
* small to represent are rounded up to the smallest positive representable
* value.
*
* @param float $f
* @return integer
*/
static function encodeNorm($f)
{
return self::_floatToByte($f);
}
/**
* Float to byte conversion
*
* @param integer $b
* @return float
*/
private static function _floatToByte($f)
{
// round negatives up to zero
if ($f <= 0.0) {
return 0;
}
// search for appropriate value
$lowIndex = 0;
$highIndex = 255;
while ($highIndex >= $lowIndex) {
// $mid = ($highIndex - $lowIndex)/2;
$mid = ($highIndex + $lowIndex) >> 1;
$delta = $f - self::$_normTable[$mid];
if ($delta < 0) {
$highIndex = $mid-1;
} elseif ($delta > 0) {
$lowIndex = $mid+1;
} else {
return $mid; // We got it!
}
}
// round to closest value
if ($highIndex != 255 &&
$f - self::$_normTable[$highIndex] > self::$_normTable[$highIndex+1] - $f ) {
return $highIndex + 1;
} else {
return $highIndex;
}
}
/**
* Computes a score factor based on a term or phrase's frequency in a
* document. This value is multiplied by the idf(Term, Searcher)
* factor for each term in the query and these products are then summed to
* form the initial score for a document.
*
* Terms and phrases repeated in a document indicate the topic of the
* document, so implementations of this method usually return larger values
* when 'freq' is large, and smaller values when 'freq'
* is small.
*
* freq - the frequency of a term within a document
* Returns a score factor based on a term's within-document frequency
*
* @param float $freq
* @return float
*/
abstract public function tf($freq);
/**
* Computes the amount of a sloppy phrase match, based on an edit distance.
* This value is summed for each sloppy phrase match in a document to form
* the frequency that is passed to tf(float).
*
* A phrase match with a small edit distance to a document passage more
* closely matches the document, so implementations of this method usually
* return larger values when the edit distance is small and smaller values
* when it is large.
*
* distance - the edit distance of this sloppy phrase match
* Returns the frequency increment for this match
*
* @param integer $distance
* @return float
*/
abstract public function sloppyFreq($distance);
/**
* Computes a score factor for a simple term or a phrase.
*
* The default implementation is:
* return idfFreq(searcher.docFreq(term), searcher.maxDoc());
*
* input - the term in question or array of terms
* reader - reader the document collection being searched
* Returns a score factor for the term
*
* @param mixed $input
* @param Zend_Search_Lucene_Interface $reader
* @return a score factor for the term
*/
public function idf($input, Zend_Search_Lucene_Interface $reader)
{
if (!is_array($input)) {
return $this->idfFreq($reader->docFreq($input), $reader->count());
} else {
$idf = 0.0;
foreach ($input as $term) {
$idf += $this->idfFreq($reader->docFreq($term), $reader->count());
}
return $idf;
}
}
/**
* Computes a score factor based on a term's document frequency (the number
* of documents which contain the term). This value is multiplied by the
* tf(int) factor for each term in the query and these products are
* then summed to form the initial score for a document.
*
* Terms that occur in fewer documents are better indicators of topic, so
* implemenations of this method usually return larger values for rare terms,
* and smaller values for common terms.
*
* docFreq - the number of documents which contain the term
* numDocs - the total number of documents in the collection
* Returns a score factor based on the term's document frequency
*
* @param integer $docFreq
* @param integer $numDocs
* @return float
*/
abstract public function idfFreq($docFreq, $numDocs);
/**
* Computes a score factor based on the fraction of all query terms that a
* document contains. This value is multiplied into scores.
*
* The presence of a large portion of the query terms indicates a better
* match with the query, so implemenations of this method usually return
* larger values when the ratio between these parameters is large and smaller
* values when the ratio between them is small.
*
* overlap - the number of query terms matched in the document
* maxOverlap - the total number of terms in the query
* Returns a score factor based on term overlap with the query
*
* @param integer $overlap
* @param integer $maxOverlap
* @return float
*/
abstract public function coord($overlap, $maxOverlap);
}
PK �HH[�`�iQ Q QueryEntry/Subquery.phpnu &1i� <?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Index_Term */
require_once 'Zend/Search/Lucene/Index/Term.php';
/** Zend_Search_Lucene_Exception */
require_once 'Zend/Search/Lucene/Exception.php';
/** Zend_Search_Lucene_Search_QueryEntry */
require_once 'Zend/Search/Lucene/Search/QueryEntry.php';
/** Zend_Search_Lucene_Search_QueryParserException */
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Search_QueryEntry_Subquery extends Zend_Search_Lucene_Search_QueryEntry
{
/**
* Query
*
* @var Zend_Search_Lucene_Search_Query
*/
private $_query;
/**
* Object constractor
*
* @param Zend_Search_Lucene_Search_Query $query
*/
public function __construct(Zend_Search_Lucene_Search_Query $query)
{
$this->_query = $query;
}
/**
* Process modifier ('~')
*
* @param mixed $parameter
* @throws Zend_Search_Lucene_Search_QueryParserException
*/
public function processFuzzyProximityModifier($parameter = null)
{
throw new Zend_Search_Lucene_Search_QueryParserException('\'~\' sign must follow term or phrase');
}
/**
* Transform entry to a subquery
*
* @param string $encoding
* @return Zend_Search_Lucene_Search_Query
*/
public function getQuery($encoding)
{
$this->_query->setBoost($this->_boost);
return $this->_query;
}
}
PK �HH[%�Z� � QueryEntry/Term.phpnu &1i� <?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Index_Term */
require_once 'Zend/Search/Lucene/Index/Term.php';
/** Zend_Search_Lucene_Exception */
require_once 'Zend/Search/Lucene/Exception.php';
/** Zend_Search_Lucene_Search_QueryEntry */
require_once 'Zend/Search/Lucene/Search/QueryEntry.php';
/** Zend_Search_Lucene_Search_QueryParserException */
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
/** Zend_Search_Lucene_Analysis_Analyzer */
require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Search_QueryEntry_Term extends Zend_Search_Lucene_Search_QueryEntry
{
/**
* Term value
*
* @var string
*/
private $_term;
/**
* Field
*
* @var string|null
*/
private $_field;
/**
* Fuzzy search query
*
* @var boolean
*/
private $_fuzzyQuery = false;
/**
* Similarity
*
* @var float
*/
private $_similarity = 1.;
/**
* Object constractor
*
* @param string $term
* @param string $field
*/
public function __construct($term, $field)
{
$this->_term = $term;
$this->_field = $field;
}
/**
* Process modifier ('~')
*
* @param mixed $parameter
*/
public function processFuzzyProximityModifier($parameter = null)
{
$this->_fuzzyQuery = true;
if ($parameter !== null) {
$this->_similarity = $parameter;
} else {
$this->_similarity = Zend_Search_Lucene_Search_Query_Fuzzy::DEFAULT_MIN_SIMILARITY;
}
}
/**
* Transform entry to a subquery
*
* @param string $encoding
* @return Zend_Search_Lucene_Search_Query
* @throws Zend_Search_Lucene_Search_QueryParserException
*/
public function getQuery($encoding)
{
if (strpos($this->_term, '?') !== false || strpos($this->_term, '*') !== false) {
if ($this->_fuzzyQuery) {
throw new Zend_Search_Lucene_Search_QueryParserException('Fuzzy search is not supported for terms with wildcards.');
}
$pattern = '';
$subPatterns = explode('*', $this->_term);
$astericFirstPass = true;
foreach ($subPatterns as $subPattern) {
if (!$astericFirstPass) {
$pattern .= '*';
} else {
$astericFirstPass = false;
}
$subPatternsL2 = explode('?', $subPattern);
$qMarkFirstPass = true;
foreach ($subPatternsL2 as $subPatternL2) {
if (!$qMarkFirstPass) {
$pattern .= '?';
} else {
$qMarkFirstPass = false;
}
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($subPatternL2, $encoding);
if (count($tokens) > 1) {
throw new Zend_Search_Lucene_Search_QueryParserException('Wildcard search is supported only for non-multiple word terms');
}
foreach ($tokens as $token) {
$pattern .= $token->getTermText();
}
}
}
$term = new Zend_Search_Lucene_Index_Term($pattern, $this->_field);
$query = new Zend_Search_Lucene_Search_Query_Wildcard($term);
$query->setBoost($this->_boost);
return $query;
}
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_term, $encoding);
if (count($tokens) == 0) {
return new Zend_Search_Lucene_Search_Query_Insignificant();
}
if (count($tokens) == 1 && !$this->_fuzzyQuery) {
$term = new Zend_Search_Lucene_Index_Term($tokens[0]->getTermText(), $this->_field);
$query = new Zend_Search_Lucene_Search_Query_Term($term);
$query->setBoost($this->_boost);
return $query;
}
if (count($tokens) == 1 && $this->_fuzzyQuery) {
$term = new Zend_Search_Lucene_Index_Term($tokens[0]->getTermText(), $this->_field);
$query = new Zend_Search_Lucene_Search_Query_Fuzzy($term, $this->_similarity);
$query->setBoost($this->_boost);
return $query;
}
if ($this->_fuzzyQuery) {
throw new Zend_Search_Lucene_Search_QueryParserException('Fuzzy search is supported only for non-multiple word terms');
}
//It's not empty or one term query
$query = new Zend_Search_Lucene_Search_Query_MultiTerm();
/**
* @todo Process $token->getPositionIncrement() to support stemming, synonyms and other
* analizer design features
*/
foreach ($tokens as $token) {
$term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $this->_field);
$query->addTerm($term, true); // all subterms are required
}
$query->setBoost($this->_boost);
return $query;
}
}
PK �HH[~�� � QueryEntry/Phrase.phpnu &1i� <?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Index_Term */
require_once 'Zend/Search/Lucene/Index/Term.php';
/** Zend_Search_Lucene_Exception */
require_once 'Zend/Search/Lucene/Exception.php';
/** Zend_Search_Lucene_Search_QueryEntry */
require_once 'Zend/Search/Lucene/Search/QueryEntry.php';
/** Zend_Search_Lucene_Search_QueryParserException */
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
/** Zend_Search_Lucene_Analysis_Analyzer */
require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Search_QueryEntry_Phrase extends Zend_Search_Lucene_Search_QueryEntry
{
/**
* Phrase value
*
* @var string
*/
private $_phrase;
/**
* Field
*
* @var string|null
*/
private $_field;
/**
* Proximity phrase query
*
* @var boolean
*/
private $_proximityQuery = false;
/**
* Words distance, used for proximiti queries
*
* @var integer
*/
private $_wordsDistance = 0;
/**
* Object constractor
*
* @param string $phrase
* @param string $field
*/
public function __construct($phrase, $field)
{
$this->_phrase = $phrase;
$this->_field = $field;
}
/**
* Process modifier ('~')
*
* @param mixed $parameter
*/
public function processFuzzyProximityModifier($parameter = null)
{
$this->_proximityQuery = true;
if ($parameter !== null) {
$this->_wordsDistance = $parameter;
}
}
/**
* Transform entry to a subquery
*
* @param string $encoding
* @return Zend_Search_Lucene_Search_Query
* @throws Zend_Search_Lucene_Search_QueryParserException
*/
public function getQuery($encoding)
{
if (strpos($this->_phrase, '?') !== false || strpos($this->_phrase, '*') !== false) {
throw new Zend_Search_Lucene_Search_QueryParserException('Wildcards are only allowed in a single terms.');
}
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_phrase, $encoding);
if (count($tokens) == 0) {
return new Zend_Search_Lucene_Search_Query_Insignificant();
}
if (count($tokens) == 1) {
$term = new Zend_Search_Lucene_Index_Term($tokens[0]->getTermText(), $this->_field);
$query = new Zend_Search_Lucene_Search_Query_Term($term);
$query->setBoost($this->_boost);
return $query;
}
//It's not empty or one term query
$position = -1;
$query = new Zend_Search_Lucene_Search_Query_Phrase();
foreach ($tokens as $token) {
$position += $token->getPositionIncrement();
$term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $this->_field);
$query->addTerm($term, $position);
}
if ($this->_proximityQuery) {
$query->setSlop($this->_wordsDistance);
}
$query->setBoost($this->_boost);
return $query;
}
}
PK �HH[
��2 �2 QueryParserContext.phpnu &1i� <?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_FSM */
require_once 'Zend/Search/Lucene/FSM.php';
/** Zend_Search_Lucene_Index_Term */
require_once 'Zend/Search/Lucene/Index/Term.php';
/** Zend_Search_Lucene_Search_QueryToken */
require_once 'Zend/Search/Lucene/Search/QueryToken.php';
/** Zend_Search_Lucene_Search_Query_Term */
require_once 'Zend/Search/Lucene/Search/Query/Term.php';
/** Zend_Search_Lucene_Search_Query_MultiTerm */
require_once 'Zend/Search/Lucene/Search/Query/MultiTerm.php';
/** Zend_Search_Lucene_Search_Query_Boolean */
require_once 'Zend/Search/Lucene/Search/Query/Boolean.php';
/** Zend_Search_Lucene_Search_Query_Phrase */
require_once 'Zend/Search/Lucene/Search/Query/Phrase.php';
/** Zend_Search_Lucene_Exception */
require_once 'Zend/Search/Lucene/Exception.php';
/** Zend_Search_Lucene_Search_QueryParserException */
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
/** Zend_Search_Lucene_Search_BooleanExpressionRecognizer */
require_once 'Zend/Search/Lucene/Search/BooleanExpressionRecognizer.php';
/** Zend_Search_Lucene_Search_QueryEntry */
require_once 'Zend/Search/Lucene/Search/QueryEntry.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Search_QueryParserContext
{
/**
* Default field for the context.
*
* null means, that term should be searched through all fields
* Zend_Search_Lucene_Search_Query::rewriteQuery($index) transletes such queries to several
*
* @var string|null
*/
private $_defaultField;
/**
* Field specified for next entry
*
* @var string
*/
private $_nextEntryField = null;
/**
* True means, that term is required.
* False means, that term is prohibited.
* null means, that term is neither prohibited, nor required
*
* @var boolean
*/
private $_nextEntrySign = null;
/**
* Entries grouping mode
*/
const GM_SIGNS = 0; // Signs mode: '+term1 term2 -term3 +(subquery1) -(subquery2)'
const GM_BOOLEAN = 1; // Boolean operators mode: 'term1 and term2 or (subquery1) and not (subquery2)'
/**
* Grouping mode
*
* @var integer
*/
private $_mode = null;
/**
* Entries signs.
* Used in GM_SIGNS grouping mode
*
* @var arrays
*/
private $_signs = array();
/**
* Query entries
* Each entry is a Zend_Search_Lucene_Search_QueryEntry object or
* boolean operator (Zend_Search_Lucene_Search_QueryToken class constant)
*
* @var array
*/
private $_entries = array();
/**
* Query string encoding
*
* @var string
*/
private $_encoding;
/**
* Context object constructor
*
* @param string $encoding
* @param string|null $defaultField
*/
public function __construct($encoding, $defaultField = null)
{
$this->_encoding = $encoding;
$this->_defaultField = $defaultField;
}
/**
* Get context default field
*
* @return string|null
*/
public function getField()
{
return ($this->_nextEntryField !== null) ? $this->_nextEntryField : $this->_defaultField;
}
/**
* Set field for next entry
*
* @param string $field
*/
public function setNextEntryField($field)
{
$this->_nextEntryField = $field;
}
/**
* Set sign for next entry
*
* @param integer $sign
* @throws Zend_Search_Lucene_Exception
*/
public function setNextEntrySign($sign)
{
if ($this->_mode === self::GM_BOOLEAN) {
throw new Zend_Search_Lucene_Search_QueryParserException('It\'s not allowed to mix boolean and signs styles in the same subquery.');
}
$this->_mode = self::GM_SIGNS;
if ($sign == Zend_Search_Lucene_Search_QueryToken::TT_REQUIRED) {
$this->_nextEntrySign = true;
} else if ($sign == Zend_Search_Lucene_Search_QueryToken::TT_PROHIBITED) {
$this->_nextEntrySign = false;
} else {
throw new Zend_Search_Lucene_Exception('Unrecognized sign type.');
}
}
/**
* Add entry to a query
*
* @param Zend_Search_Lucene_Search_QueryEntry $entry
*/
public function addEntry(Zend_Search_Lucene_Search_QueryEntry $entry)
{
if ($this->_mode !== self::GM_BOOLEAN) {
$this->_signs[] = $this->_nextEntrySign;
}
$this->_entries[] = $entry;
$this->_nextEntryField = null;
$this->_nextEntrySign = null;
}
/**
* Process fuzzy search or proximity search modifier
*
* @throws Zend_Search_Lucene_Search_QueryParserException
*/
public function processFuzzyProximityModifier($parameter = null)
{
// Check, that modifier has came just after word or phrase
if ($this->_nextEntryField !== null || $this->_nextEntrySign !== null) {
throw new Zend_Search_Lucene_Search_QueryParserException('\'~\' modifier must follow word or phrase.');
}
$lastEntry = array_pop($this->_entries);
if (!$lastEntry instanceof Zend_Search_Lucene_Search_QueryEntry) {
// there are no entries or last entry is boolean operator
throw new Zend_Search_Lucene_Search_QueryParserException('\'~\' modifier must follow word or phrase.');
}
$lastEntry->processFuzzyProximityModifier($parameter);
$this->_entries[] = $lastEntry;
}
/**
* Set boost factor to the entry
*
* @param float $boostFactor
*/
public function boost($boostFactor)
{
// Check, that modifier has came just after word or phrase
if ($this->_nextEntryField !== null || $this->_nextEntrySign !== null) {
throw new Zend_Search_Lucene_Search_QueryParserException('\'^\' modifier must follow word, phrase or subquery.');
}
$lastEntry = array_pop($this->_entries);
if (!$lastEntry instanceof Zend_Search_Lucene_Search_QueryEntry) {
// there are no entries or last entry is boolean operator
throw new Zend_Search_Lucene_Search_QueryParserException('\'^\' modifier must follow word, phrase or subquery.');
}
$lastEntry->boost($boostFactor);
$this->_entries[] = $lastEntry;
}
/**
* Process logical operator
*
* @param integer $operator
*/
public function addLogicalOperator($operator)
{
if ($this->_mode === self::GM_SIGNS) {
throw new Zend_Search_Lucene_Search_QueryParserException('It\'s not allowed to mix boolean and signs styles in the same subquery.');
}
$this->_mode = self::GM_BOOLEAN;
$this->_entries[] = $operator;
}
/**
* Generate 'signs style' query from the context
* '+term1 term2 -term3 +(<subquery1>) ...'
*
* @return Zend_Search_Lucene_Search_Query
*/
public function _signStyleExpressionQuery()
{
$query = new Zend_Search_Lucene_Search_Query_Boolean();
if (Zend_Search_Lucene_Search_QueryParser::getDefaultOperator() == Zend_Search_Lucene_Search_QueryParser::B_AND) {
$defaultSign = true; // required
} else {
// Zend_Search_Lucene_Search_QueryParser::B_OR
$defaultSign = null; // optional
}
foreach ($this->_entries as $entryId => $entry) {
$sign = ($this->_signs[$entryId] !== null) ? $this->_signs[$entryId] : $defaultSign;
$query->addSubquery($entry->getQuery($this->_encoding), $sign);
}
return $query;
}
/**
* Generate 'boolean style' query from the context
* 'term1 and term2 or term3 and (<subquery1>) and not (<subquery2>)'
*
* @return Zend_Search_Lucene_Search_Query
* @throws Zend_Search_Lucene
*/
private function _booleanExpressionQuery()
{
/**
* We treat each level of an expression as a boolean expression in
* a Disjunctive Normal Form
*
* AND operator has higher precedence than OR
*
* Thus logical query is a disjunction of one or more conjunctions of
* one or more query entries
*/
$expressionRecognizer = new Zend_Search_Lucene_Search_BooleanExpressionRecognizer();
try {
foreach ($this->_entries as $entry) {
if ($entry instanceof Zend_Search_Lucene_Search_QueryEntry) {
$expressionRecognizer->processLiteral($entry);
} else {
switch ($entry) {
case Zend_Search_Lucene_Search_QueryToken::TT_AND_LEXEME:
$expressionRecognizer->processOperator(Zend_Search_Lucene_Search_BooleanExpressionRecognizer::IN_AND_OPERATOR);
break;
case Zend_Search_Lucene_Search_QueryToken::TT_OR_LEXEME:
$expressionRecognizer->processOperator(Zend_Search_Lucene_Search_BooleanExpressionRecognizer::IN_OR_OPERATOR);
break;
case Zend_Search_Lucene_Search_QueryToken::TT_NOT_LEXEME:
$expressionRecognizer->processOperator(Zend_Search_Lucene_Search_BooleanExpressionRecognizer::IN_NOT_OPERATOR);
break;
default:
throw new Zend_Search_Lucene('Boolean expression error. Unknown operator type.');
}
}
}
$conjuctions = $expressionRecognizer->finishExpression();
} catch (Zend_Search_Exception $e) {
// throw new Zend_Search_Lucene_Search_QueryParserException('Boolean expression error. Error message: \'' .
// $e->getMessage() . '\'.' );
// It's query syntax error message and it should be user friendly. So FSM message is omitted
throw new Zend_Search_Lucene_Search_QueryParserException('Boolean expression error.');
}
// Remove 'only negative' conjunctions
foreach ($conjuctions as $conjuctionId => $conjuction) {
$nonNegativeEntryFound = false;
foreach ($conjuction as $conjuctionEntry) {
if ($conjuctionEntry[1]) {
$nonNegativeEntryFound = true;
break;
}
}
if (!$nonNegativeEntryFound) {
unset($conjuctions[$conjuctionId]);
}
}
$subqueries = array();
foreach ($conjuctions as $conjuction) {
// Check, if it's a one term conjuction
if (count($conjuction) == 1) {
$subqueries[] = $conjuction[0][0]->getQuery($this->_encoding);
} else {
$subquery = new Zend_Search_Lucene_Search_Query_Boolean();
foreach ($conjuction as $conjuctionEntry) {
$subquery->addSubquery($conjuctionEntry[0]->getQuery($this->_encoding), $conjuctionEntry[1]);
}
$subqueries[] = $subquery;
}
}
if (count($subqueries) == 0) {
return new Zend_Search_Lucene_Search_Query_Insignificant();
}
if (count($subqueries) == 1) {
return $subqueries[0];
}
$query = new Zend_Search_Lucene_Search_Query_Boolean();
foreach ($subqueries as $subquery) {
// Non-requirered entry/subquery
$query->addSubquery($subquery);
}
return $query;
}
/**
* Generate query from current context
*
* @return Zend_Search_Lucene_Search_Query
*/
public function getQuery()
{
if ($this->_mode === self::GM_BOOLEAN) {
return $this->_booleanExpressionQuery();
} else {
return $this->_signStyleExpressionQuery();
}
}
}
PK �HH[n�@
�K �K Query/MultiTerm.phpnu &1i� <?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Search_Query */
require_once 'Zend/Search/Lucene/Search/Query.php';
/** Zend_Search_Lucene_Search_Weight_MultiTerm */
require_once 'Zend/Search/Lucene/Search/Weight/MultiTerm.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Search_Query_MultiTerm extends Zend_Search_Lucene_Search_Query
{
/**
* Terms to find.
* Array of Zend_Search_Lucene_Index_Term
*
* @var array
*/
private $_terms = array();
/**
* Term signs.
* If true then term is required.
* If false then term is prohibited.
* If null then term is neither prohibited, nor required
*
* If array is null then all terms are required
*
* @var array
*/
private $_signs;
/**
* Result vector.
*
* @var array
*/
private $_resVector = null;
/**
* Terms positions vectors.
* Array of Arrays:
* term1Id => (docId => freq, ...)
* term2Id => (docId => freq, ...)
*
* @var array
*/
private $_termsFreqs = array();
/**
* A score factor based on the fraction of all query terms
* that a document contains.
* float for conjunction queries
* array of float for non conjunction queries
*
* @var mixed
*/
private $_coord = null;
/**
* Terms weights
* array of Zend_Search_Lucene_Search_Weight
*
* @var array
*/
private $_weights = array();
/**
* Class constructor. Create a new multi-term query object.
*
* if $signs array is omitted then all terms are required
* it differs from addTerm() behavior, but should never be used
*
* @param array $terms Array of Zend_Search_Lucene_Index_Term objects
* @param array $signs Array of signs. Sign is boolean|null.
*/
public function __construct($terms = null, $signs = null)
{
if (is_array($terms)) {
$this->_terms = $terms;
$this->_signs = null;
// Check if all terms are required
if (is_array($signs)) {
foreach ($signs as $sign ) {
if ($sign !== true) {
$this->_signs = $signs;
break;
}
}
}
}
}
/**
* Add a $term (Zend_Search_Lucene_Index_Term) to this query.
*
* The sign is specified as:
* TRUE - term is required
* FALSE - term is prohibited
* NULL - term is neither prohibited, nor required
*
* @param Zend_Search_Lucene_Index_Term $term
* @param boolean|null $sign
* @return void
*/
public function addTerm(Zend_Search_Lucene_Index_Term $term, $sign = null) {
if ($sign !== true || $this->_signs !== null) { // Skip, if all terms are required
if ($this->_signs === null) { // Check, If all previous terms are required
$this->_signs = array();
foreach ($this->_terms as $prevTerm) {
$this->_signs[] = true;
}
}
$this->_signs[] = $sign;
}
$this->_terms[] = $term;
}
/**
* Re-write query into primitive queries in the context of specified index
*
* @param Zend_Search_Lucene_Interface $index
* @return Zend_Search_Lucene_Search_Query
*/
public function rewrite(Zend_Search_Lucene_Interface $index)
{
if (count($this->_terms) == 0) {
return new Zend_Search_Lucene_Search_Query_Empty();
}
// Check, that all fields are qualified
$allQualified = true;
foreach ($this->_terms as $term) {
if ($term->field === null) {
$allQualified = false;
break;
}
}
if ($allQualified) {
return $this;
} else {
/** transform multiterm query to boolean and apply rewrite() method to subqueries. */
$query = new Zend_Search_Lucene_Search_Query_Boolean();
$query->setBoost($this->getBoost());
foreach ($this->_terms as $termId => $term) {
$subquery = new Zend_Search_Lucene_Search_Query_Term($term);
$query->addSubquery($subquery->rewrite($index),
($this->_signs === null)? true : $this->_signs[$termId]);
}
return $query;
}
}
/**
* Optimize query in the context of specified index
*
* @param Zend_Search_Lucene_Interface $index
* @return Zend_Search_Lucene_Search_Query
*/
public function optimize(Zend_Search_Lucene_Interface $index)
{
$terms = $this->_terms;
$signs = $this->_signs;
foreach ($terms as $id => $term) {
if (!$index->hasTerm($term)) {
if ($signs === null || $signs[$id] === true) {
// Term is required
return new Zend_Search_Lucene_Search_Query_Empty();
} else {
// Term is optional or prohibited
// Remove it from terms and signs list
unset($terms[$id]);
unset($signs[$id]);
}
}
}
// Check if all presented terms are prohibited
$allProhibited = true;
if ($signs === null) {
$allProhibited = false;
} else {
foreach ($signs as $sign) {
if ($sign !== false) {
$allProhibited = false;
break;
}
}
}
if ($allProhibited) {
return new Zend_Search_Lucene_Search_Query_Empty();
}
/**
* @todo make an optimization for repeated terms
* (they may have different signs)
*/
if (count($terms) == 1) {
// It's already checked, that it's not a prohibited term
// It's one term query with one required or optional element
$optimizedQuery = new Zend_Search_Lucene_Search_Query_Term(reset($terms));
$optimizedQuery->setBoost($this->getBoost());
return $optimizedQuery;
}
if (count($terms) == 0) {
return new Zend_Search_Lucene_Search_Query_Empty();
}
$optimizedQuery = new Zend_Search_Lucene_Search_Query_MultiTerm($terms, $signs);
$optimizedQuery->setBoost($this->getBoost());
return $optimizedQuery;
}
/**
* Returns query term
*
* @return array
*/
public function getTerms()
{
return $this->_terms;
}
/**
* Return terms signs
*
* @return array
*/
public function getSigns()
{
return $this->_signs;
}
/**
* Set weight for specified term
*
* @param integer $num
* @param Zend_Search_Lucene_Search_Weight_Term $weight
*/
public function setWeight($num, $weight)
{
$this->_weights[$num] = $weight;
}
/**
* Constructs an appropriate Weight implementation for this query.
*
* @param Zend_Search_Lucene_Interface $reader
* @return Zend_Search_Lucene_Search_Weight
*/
public function createWeight(Zend_Search_Lucene_Interface $reader)
{
$this->_weight = new Zend_Search_Lucene_Search_Weight_MultiTerm($this, $reader);
return $this->_weight;
}
/**
* Calculate result vector for Conjunction query
* (like '+something +another')
*
* @param Zend_Search_Lucene_Interface $reader
*/
private function _calculateConjunctionResult(Zend_Search_Lucene_Interface $reader)
{
$this->_resVector = null;
if (count($this->_terms) == 0) {
$this->_resVector = array();
}
// Order terms by selectivity
$docFreqs = array();
$ids = array();
foreach ($this->_terms as $id => $term) {
$docFreqs[] = $reader->docFreq($term);
$ids[] = $id; // Used to keep original order for terms with the same selectivity and omit terms comparison
}
array_multisort($docFreqs, SORT_ASC, SORT_NUMERIC,
$ids, SORT_ASC, SORT_NUMERIC,
$this->_terms);
$docsFilter = new Zend_Search_Lucene_Index_DocsFilter();
foreach ($this->_terms as $termId => $term) {
$termDocs = $reader->termDocs($term, $docsFilter);
}
// Treat last retrieved docs vector as a result set
// (filter collects data for other terms)
$this->_resVector = array_flip($termDocs);
foreach ($this->_terms as $termId => $term) {
$this->_termsFreqs[$termId] = $reader->termFreqs($term, $docsFilter);
}
// ksort($this->_resVector, SORT_NUMERIC);
// Docs are returned ordered. Used algorithms doesn't change elements order.
}
/**
* Calculate result vector for non Conjunction query
* (like '+something -another')
*
* @param Zend_Search_Lucene_Interface $reader
*/
private function _calculateNonConjunctionResult(Zend_Search_Lucene_Interface $reader)
{
$requiredVectors = array();
$requiredVectorsSizes = array();
$requiredVectorsIds = array(); // is used to prevent arrays comparison
$optional = array();
$prohibited = array();
foreach ($this->_terms as $termId => $term) {
$termDocs = array_flip($reader->termDocs($term));
if ($this->_signs[$termId] === true) {
// required
$requiredVectors[] = $termDocs;
$requiredVectorsSizes[] = count($termDocs);
$requiredVectorsIds[] = $termId;
} elseif ($this->_signs[$termId] === false) {
// prohibited
// array union
$prohibited += $termDocs;
} else {
// neither required, nor prohibited
// array union
$optional += $termDocs;
}
$this->_termsFreqs[$termId] = $reader->termFreqs($term);
}
// sort resvectors in order of subquery cardinality increasing
array_multisort($requiredVectorsSizes, SORT_ASC, SORT_NUMERIC,
$requiredVectorsIds, SORT_ASC, SORT_NUMERIC,
$requiredVectors);
$required = null;
foreach ($requiredVectors as $nextResVector) {
if($required === null) {
$required = $nextResVector;
} else {
//$required = array_intersect_key($required, $nextResVector);
/**
* This code is used as workaround for array_intersect_key() slowness problem.
*/
$updatedVector = array();
foreach ($required as $id => $value) {
if (isset($nextResVector[$id])) {
$updatedVector[$id] = $value;
}
}
$required = $updatedVector;
}
if (count($required) == 0) {
// Empty result set, we don't need to check other terms
break;
}
}
if ($required !== null) {
$this->_resVector = $required;
} else {
$this->_resVector = $optional;
}
if (count($prohibited) != 0) {
// $this->_resVector = array_diff_key($this->_resVector, $prohibited);
/**
* This code is used as workaround for array_diff_key() slowness problem.
*/
if (count($this->_resVector) < count($prohibited)) {
$updatedVector = $this->_resVector;
foreach ($this->_resVector as $id => $value) {
if (isset($prohibited[$id])) {
unset($updatedVector[$id]);
}
}
$this->_resVector = $updatedVector;
} else {
$updatedVector = $this->_resVector;
foreach ($prohibited as $id => $value) {
unset($updatedVector[$id]);
}
$this->_resVector = $updatedVector;
}
}
ksort($this->_resVector, SORT_NUMERIC);
}
/**
* Score calculator for conjunction queries (all terms are required)
*
* @param integer $docId
* @param Zend_Search_Lucene_Interface $reader
* @return float
*/
public function _conjunctionScore($docId, Zend_Search_Lucene_Interface $reader)
{
if ($this->_coord === null) {
$this->_coord = $reader->getSimilarity()->coord(count($this->_terms),
count($this->_terms) );
}
$score = 0.0;
foreach ($this->_terms as $termId => $term) {
/**
* We don't need to check that term freq is not 0
* Score calculation is performed only for matched docs
*/
$score += $reader->getSimilarity()->tf($this->_termsFreqs[$termId][$docId]) *
$this->_weights[$termId]->getValue() *
$reader->norm($docId, $term->field);
}
return $score * $this->_coord * $this->getBoost();
}
/**
* Score calculator for non conjunction queries (not all terms are required)
*
* @param integer $docId
* @param Zend_Search_Lucene_Interface $reader
* @return float
*/
public function _nonConjunctionScore($docId, $reader)
{
if ($this->_coord === null) {
$this->_coord = array();
$maxCoord = 0;
foreach ($this->_signs as $sign) {
if ($sign !== false /* not prohibited */) {
$maxCoord++;
}
}
for ($count = 0; $count <= $maxCoord; $count++) {
$this->_coord[$count] = $reader->getSimilarity()->coord($count, $maxCoord);
}
}
$score = 0.0;
$matchedTerms = 0;
foreach ($this->_terms as $termId=>$term) {
// Check if term is
if ($this->_signs[$termId] !== false && // not prohibited
isset($this->_termsFreqs[$termId][$docId]) // matched
) {
$matchedTerms++;
/**
* We don't need to check that term freq is not 0
* Score calculation is performed only for matched docs
*/
$score +=
$reader->getSimilarity()->tf($this->_termsFreqs[$termId][$docId]) *
$this->_weights[$termId]->getValue() *
$reader->norm($docId, $term->field);
}
}
return $score * $this->_coord[$matchedTerms] * $this->getBoost();
}
/**
* Execute query in context of index reader
* It also initializes necessary internal structures
*
* @param Zend_Search_Lucene_Interface $reader
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
*/
public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
{
if ($this->_signs === null) {
$this->_calculateConjunctionResult($reader);
} else {
$this->_calculateNonConjunctionResult($reader);
}
// Initialize weight if it's not done yet
$this->_initWeight($reader);
}
/**
* Get document ids likely matching the query
*
* It's an array with document ids as keys (performance considerations)
*
* @return array
*/
public function matchedDocs()
{
return $this->_resVector;
}
/**
* Score specified document
*
* @param integer $docId
* @param Zend_Search_Lucene_Interface $reader
* @return float
*/
public function score($docId, Zend_Search_Lucene_Interface $reader)
{
if (isset($this->_resVector[$docId])) {
if ($this->_signs === null) {
return $this->_conjunctionScore($docId, $reader);
} else {
return $this->_nonConjunctionScore($docId, $reader);
}
} else {
return 0;
}
}
/**
* Return query terms
*
* @return array
*/
public function getQueryTerms()
{
if ($this->_signs === null) {
return $this->_terms;
}
$terms = array();
foreach ($this->_signs as $id => $sign) {
if ($sign !== false) {
$terms[] = $this->_terms[$id];
}
}
return $terms;
}
/**
* Highlight query terms
*
* @param integer &$colorIndex
* @param Zend_Search_Lucene_Document_Html $doc
*/
public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)
{
$words = array();
if ($this->_signs === null) {
foreach ($this->_terms as $term) {
$words[] = $term->text;
}
} else {
foreach ($this->_signs as $id => $sign) {
if ($sign !== false) {
$words[] = $this->_terms[$id]->text;
}
}
}
$doc->highlight($words, $this->_getHighlightColor($colorIndex));
}
/**
* Print a query
*
* @return string
*/
public function __toString()
{
// It's used only for query visualisation, so we don't care about characters escaping
$query = '';
foreach ($this->_terms as $id => $term) {
if ($id != 0) {
$query .= ' ';
}
if ($this->_signs === null || $this->_signs[$id] === true) {
$query .= '+';
} else if ($this->_signs[$id] === false) {
$query .= '-';
}
if ($term->field !== null) {
$query .= $term->field . ':';
}
$query .= $term->text;
}
if ($this->getBoost() != 1) {
$query = '(' . $query . ')^' . $this->getBoost();
}
return $query;
}
}
PK �HH[�ͯ9�C �C Query/Phrase.phpnu &1i� <?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/**
* Zend_Search_Lucene_Search_Query
*/
require_once 'Zend/Search/Lucene/Search/Query.php';
/**
* Zend_Search_Lucene_Search_Weight_MultiTerm
*/
require_once 'Zend/Search/Lucene/Search/Weight/Phrase.php';
/**
* A Query that matches documents containing a particular sequence of terms.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Search_Query_Phrase extends Zend_Search_Lucene_Search_Query
{
/**
* Terms to find.
* Array of Zend_Search_Lucene_Index_Term objects.
*
* @var array
*/
private $_terms;
/**
* Term positions (relative positions of terms within the phrase).
* Array of integers
*
* @var array
*/
private $_offsets;
/**
* Sets the number of other words permitted between words in query phrase.
* If zero, then this is an exact phrase search. For larger values this works
* like a WITHIN or NEAR operator.
*
* The slop is in fact an edit-distance, where the units correspond to
* moves of terms in the query phrase out of position. For example, to switch
* the order of two words requires two moves (the first move places the words
* atop one another), so to permit re-orderings of phrases, the slop must be
* at least two.
* More exact matches are scored higher than sloppier matches, thus search
* results are sorted by exactness.
*
* The slop is zero by default, requiring exact matches.
*
* @var integer
*/
private $_slop;
/**
* Result vector.
*
* @var array
*/
private $_resVector = null;
/**
* Terms positions vectors.
* Array of Arrays:
* term1Id => (docId => array( pos1, pos2, ... ), ...)
* term2Id => (docId => array( pos1, pos2, ... ), ...)
*
* @var array
*/
private $_termsPositions = array();
/**
* Class constructor. Create a new prase query.
*
* @param string $field Field to search.
* @param array $terms Terms to search Array of strings.
* @param array $offsets Relative term positions. Array of integers.
* @throws Zend_Search_Lucene_Exception
*/
public function __construct($terms = null, $offsets = null, $field = null)
{
$this->_slop = 0;
if (is_array($terms)) {
$this->_terms = array();
foreach ($terms as $termId => $termText) {
$this->_terms[$termId] = ($field !== null)? new Zend_Search_Lucene_Index_Term($termText, $field):
new Zend_Search_Lucene_Index_Term($termText);
}
} else if ($terms === null) {
$this->_terms = array();
} else {
throw new Zend_Search_Lucene_Exception('terms argument must be array of strings or null');
}
if (is_array($offsets)) {
if (count($this->_terms) != count($offsets)) {
throw new Zend_Search_Lucene_Exception('terms and offsets arguments must have the same size.');
}
$this->_offsets = $offsets;
} else if ($offsets === null) {
$this->_offsets = array();
foreach ($this->_terms as $termId => $term) {
$position = count($this->_offsets);
$this->_offsets[$termId] = $position;
}
} else {
throw new Zend_Search_Lucene_Exception('offsets argument must be array of strings or null');
}
}
/**
* Set slop
*
* @param integer $slop
*/
public function setSlop($slop)
{
$this->_slop = $slop;
}
/**
* Get slop
*
* @return integer
*/
public function getSlop()
{
return $this->_slop;
}
/**
* Adds a term to the end of the query phrase.
* The relative position of the term is specified explicitly or the one immediately
* after the last term added.
*
* @param Zend_Search_Lucene_Index_Term $term
* @param integer $position
*/
public function addTerm(Zend_Search_Lucene_Index_Term $term, $position = null) {
if ((count($this->_terms) != 0)&&(end($this->_terms)->field != $term->field)) {
throw new Zend_Search_Lucene_Exception('All phrase terms must be in the same field: ' .
$term->field . ':' . $term->text);
}
$this->_terms[] = $term;
if ($position !== null) {
$this->_offsets[] = $position;
} else if (count($this->_offsets) != 0) {
$this->_offsets[] = end($this->_offsets) + 1;
} else {
$this->_offsets[] = 0;
}
}
/**
* Re-write query into primitive queries in the context of specified index
*
* @param Zend_Search_Lucene_Interface $index
* @return Zend_Search_Lucene_Search_Query
*/
public function rewrite(Zend_Search_Lucene_Interface $index)
{
if (count($this->_terms) == 0) {
return new Zend_Search_Lucene_Search_Query_Empty();
} else if ($this->_terms[0]->field !== null) {
return $this;
} else {
$query = new Zend_Search_Lucene_Search_Query_Boolean();
$query->setBoost($this->getBoost());
foreach ($index->getFieldNames(true) as $fieldName) {
$subquery = new Zend_Search_Lucene_Search_Query_Phrase();
$subquery->setSlop($this->getSlop());
foreach ($this->_terms as $termId => $term) {
$qualifiedTerm = new Zend_Search_Lucene_Index_Term($term->text, $fieldName);
$subquery->addTerm($qualifiedTerm, $this->_offsets[$termId]);
}
$query->addSubquery($subquery);
}
return $query;
}
}
/**
* Optimize query in the context of specified index
*
* @param Zend_Search_Lucene_Interface $index
* @return Zend_Search_Lucene_Search_Query
*/
public function optimize(Zend_Search_Lucene_Interface $index)
{
// Check, that index contains all phrase terms
foreach ($this->_terms as $term) {
if (!$index->hasTerm($term)) {
return new Zend_Search_Lucene_Search_Query_Empty();
}
}
if (count($this->_terms) == 1) {
// It's one term query
$optimizedQuery = new Zend_Search_Lucene_Search_Query_Term(reset($this->_terms));
$optimizedQuery->setBoost($this->getBoost());
return $optimizedQuery;
}
if (count($this->_terms) == 0) {
return new Zend_Search_Lucene_Search_Query_Empty();
}
return $this;
}
/**
* Returns query term
*
* @return array
*/
public function getTerms()
{
return $this->_terms;
}
/**
* Set weight for specified term
*
* @param integer $num
* @param Zend_Search_Lucene_Search_Weight_Term $weight
*/
public function setWeight($num, $weight)
{
$this->_weights[$num] = $weight;
}
/**
* Constructs an appropriate Weight implementation for this query.
*
* @param Zend_Search_Lucene_Interface $reader
* @return Zend_Search_Lucene_Search_Weight
*/
public function createWeight(Zend_Search_Lucene_Interface $reader)
{
$this->_weight = new Zend_Search_Lucene_Search_Weight_Phrase($this, $reader);
return $this->_weight;
}
/**
* Score calculator for exact phrase queries (terms sequence is fixed)
*
* @param integer $docId
* @return float
*/
public function _exactPhraseFreq($docId)
{
$freq = 0;
// Term Id with lowest cardinality
$lowCardTermId = null;
// Calculate $lowCardTermId
foreach ($this->_terms as $termId => $term) {
if ($lowCardTermId === null ||
count($this->_termsPositions[$termId][$docId]) <
count($this->_termsPositions[$lowCardTermId][$docId]) ) {
$lowCardTermId = $termId;
}
}
// Walk through positions of the term with lowest cardinality
foreach ($this->_termsPositions[$lowCardTermId][$docId] as $lowCardPos) {
// We expect phrase to be found
$freq++;
// Walk through other terms
foreach ($this->_terms as $termId => $term) {
if ($termId != $lowCardTermId) {
$expectedPosition = $lowCardPos +
($this->_offsets[$termId] -
$this->_offsets[$lowCardTermId]);
if (!in_array($expectedPosition, $this->_termsPositions[$termId][$docId])) {
$freq--; // Phrase wasn't found.
break;
}
}
}
}
return $freq;
}
/**
* Score calculator for sloppy phrase queries (terms sequence is fixed)
*
* @param integer $docId
* @param Zend_Search_Lucene_Interface $reader
* @return float
*/
public function _sloppyPhraseFreq($docId, Zend_Search_Lucene_Interface $reader)
{
$freq = 0;
$phraseQueue = array();
$phraseQueue[0] = array(); // empty phrase
$lastTerm = null;
// Walk through the terms to create phrases.
foreach ($this->_terms as $termId => $term) {
$queueSize = count($phraseQueue);
$firstPass = true;
// Walk through the term positions.
// Each term position produces a set of phrases.
foreach ($this->_termsPositions[$termId][$docId] as $termPosition ) {
if ($firstPass) {
for ($count = 0; $count < $queueSize; $count++) {
$phraseQueue[$count][$termId] = $termPosition;
}
} else {
for ($count = 0; $count < $queueSize; $count++) {
if ($lastTerm !== null &&
abs( $termPosition - $phraseQueue[$count][$lastTerm] -
($this->_offsets[$termId] - $this->_offsets[$lastTerm])) > $this->_slop) {
continue;
}
$newPhraseId = count($phraseQueue);
$phraseQueue[$newPhraseId] = $phraseQueue[$count];
$phraseQueue[$newPhraseId][$termId] = $termPosition;
}
}
$firstPass = false;
}
$lastTerm = $termId;
}
foreach ($phraseQueue as $phrasePos) {
$minDistance = null;
for ($shift = -$this->_slop; $shift <= $this->_slop; $shift++) {
$distance = 0;
$start = reset($phrasePos) - reset($this->_offsets) + $shift;
foreach ($this->_terms as $termId => $term) {
$distance += abs($phrasePos[$termId] - $this->_offsets[$termId] - $start);
if($distance > $this->_slop) {
break;
}
}
if ($minDistance === null || $distance < $minDistance) {
$minDistance = $distance;
}
}
if ($minDistance <= $this->_slop) {
$freq += $reader->getSimilarity()->sloppyFreq($minDistance);
}
}
return $freq;
}
/**
* Execute query in context of index reader
* It also initializes necessary internal structures
*
* @param Zend_Search_Lucene_Interface $reader
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
*/
public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
{
$this->_resVector = null;
if (count($this->_terms) == 0) {
$this->_resVector = array();
}
$resVectors = array();
$resVectorsSizes = array();
$resVectorsIds = array(); // is used to prevent arrays comparison
foreach ($this->_terms as $termId => $term) {
$resVectors[] = array_flip($reader->termDocs($term));
$resVectorsSizes[] = count(end($resVectors));
$resVectorsIds[] = $termId;
$this->_termsPositions[$termId] = $reader->termPositions($term);
}
// sort resvectors in order of subquery cardinality increasing
array_multisort($resVectorsSizes, SORT_ASC, SORT_NUMERIC,
$resVectorsIds, SORT_ASC, SORT_NUMERIC,
$resVectors);
foreach ($resVectors as $nextResVector) {
if($this->_resVector === null) {
$this->_resVector = $nextResVector;
} else {
//$this->_resVector = array_intersect_key($this->_resVector, $nextResVector);
/**
* This code is used as workaround for array_intersect_key() slowness problem.
*/
$updatedVector = array();
foreach ($this->_resVector as $id => $value) {
if (isset($nextResVector[$id])) {
$updatedVector[$id] = $value;
}
}
$this->_resVector = $updatedVector;
}
if (count($this->_resVector) == 0) {
// Empty result set, we don't need to check other terms
break;
}
}
// ksort($this->_resVector, SORT_NUMERIC);
// Docs are returned ordered. Used algorithm doesn't change elements order.
// Initialize weight if it's not done yet
$this->_initWeight($reader);
}
/**
* Get document ids likely matching the query
*
* It's an array with document ids as keys (performance considerations)
*
* @return array
*/
public function matchedDocs()
{
return $this->_resVector;
}
/**
* Score specified document
*
* @param integer $docId
* @param Zend_Search_Lucene_Interface $reader
* @return float
*/
public function score($docId, Zend_Search_Lucene_Interface $reader)
{
if (isset($this->_resVector[$docId])) {
if ($this->_slop == 0) {
$freq = $this->_exactPhraseFreq($docId);
} else {
$freq = $this->_sloppyPhraseFreq($docId, $reader);
}
if ($freq != 0) {
$tf = $reader->getSimilarity()->tf($freq);
$weight = $this->_weight->getValue();
$norm = $reader->norm($docId, reset($this->_terms)->field);
return $tf * $weight * $norm * $this->getBoost();
}
// Included in result, but culculated freq is zero
return 0;
} else {
return 0;
}
}
/**
* Return query terms
*
* @return array
*/
public function getQueryTerms()
{
return $this->_terms;
}
/**
* Highlight query terms
*
* @param integer &$colorIndex
* @param Zend_Search_Lucene_Document_Html $doc
*/
public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)
{
$words = array();
foreach ($this->_terms as $term) {
$words[] = $term->text;
}
$doc->highlight($words, $this->_getHighlightColor($colorIndex));
}
/**
* Print a query
*
* @return string
*/
public function __toString()
{
// It's used only for query visualisation, so we don't care about characters escaping
$query = '';
if (isset($this->_terms[0]) && $this->_terms[0]->field !== null) {
$query .= $this->_terms[0]->field . ':';
}
$query .= '"';
foreach ($this->_terms as $id => $term) {
if ($id != 0) {
$query .= ' ';
}
$query .= $term->text;
}
$query .= '"';
if ($this->_slop != 0) {
$query .= '~' . $this->_slop;
}
return $query;
}
}
PK �HH[���DN8 N8 Query/Fuzzy.phpnu &1i� <?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Search_Query */
require_once 'Zend/Search/Lucene/Search/Query.php';
/** Zend_Search_Lucene_Search_Query_MultiTerm */
require_once 'Zend/Search/Lucene/Search/Query/MultiTerm.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Search_Query_Fuzzy extends Zend_Search_Lucene_Search_Query
{
/** Default minimum similarity */
const DEFAULT_MIN_SIMILARITY = 0.5;
/**
* Maximum number of matched terms.
* Apache Lucene defines this limitation as boolean query maximum number of clauses:
* org.apache.lucene.search.BooleanQuery.getMaxClauseCount()
*/
const MAX_CLAUSE_COUNT = 1024;
/**
* Array of precalculated max distances
*
* keys are integers representing a word size
*/
private $_maxDistances = array();
/**
* Base searching term.
*
* @var Zend_Search_Lucene_Index_Term
*/
private $_term;
/**
* A value between 0 and 1 to set the required similarity
* between the query term and the matching terms. For example, for a
* _minimumSimilarity of 0.5 a term of the same length
* as the query term is considered similar to the query term if the edit distance
* between both terms is less than length(term)*0.5
*
* @var float
*/
private $_minimumSimilarity;
/**
* The length of common (non-fuzzy) prefix
*
* @var integer
*/
private $_prefixLength;
/**
* Matched terms.
*
* Matched terms list.
* It's filled during the search (rewrite operation) and may be used for search result
* post-processing
*
* Array of Zend_Search_Lucene_Index_Term objects
*
* @var array
*/
private $_matches = null;
/**
* Matched terms scores
*
* @var array
*/
private $_scores = null;
/**
* Array of the term keys.
* Used to sort terms in alphabetical order if terms have the same socres
*
* @var array
*/
private $_termKeys = null;
/**
* Zend_Search_Lucene_Search_Query_Wildcard constructor.
*
* @param Zend_Search_Lucene_Index_Term $pattern
* @throws Zend_Search_Lucene_Exception
*/
public function __construct(Zend_Search_Lucene_Index_Term $term, $minimumSimilarity = self::DEFAULT_MIN_SIMILARITY, $prefixLength = 0)
{
if ($minimumSimilarity < 0) {
throw new Zend_Search_Lucene_Exception('minimumSimilarity cannot be less than 0');
}
if ($minimumSimilarity >= 1) {
throw new Zend_Search_Lucene_Exception('minimumSimilarity cannot be greater than or equal to 1');
}
if ($prefixLength < 0) {
throw new Zend_Search_Lucene_Exception('prefixLength cannot be less than 0');
}
$this->_term = $term;
$this->_minimumSimilarity = $minimumSimilarity;
$this->_prefixLength = $prefixLength;
}
/**
* Calculate maximum distance for specified word length
*
* @param integer $prefixLength
* @param integer $termLength
* @param integer $length
* @return integer
*/
private function _calculateMaxDistance($prefixLength, $termLength, $length)
{
$this->_maxDistances[$length] = (int) ((1 - $this->_minimumSimilarity)*(min($termLength, $length) + $prefixLength));
return $this->_maxDistances[$length];
}
/**
* Re-write query into primitive queries in the context of specified index
*
* @param Zend_Search_Lucene_Interface $index
* @return Zend_Search_Lucene_Search_Query
*/
public function rewrite(Zend_Search_Lucene_Interface $index)
{
$this->_matches = array();
$this->_scores = array();
$this->_termKeys = array();
if ($this->_term->field === null) {
// Search through all fields
$fields = $index->getFieldNames(true /* indexed fields list */);
} else {
$fields = array($this->_term->field);
}
$prefix = Zend_Search_Lucene_Index_Term::getPrefix($this->_term->text, $this->_prefixLength);
$prefixByteLength = strlen($prefix);
$prefixUtf8Length = Zend_Search_Lucene_Index_Term::getLength($prefix);
$termLength = Zend_Search_Lucene_Index_Term::getLength($this->_term->text);
$termRest = substr($this->_term->text, $prefixByteLength);
// we calculate length of the rest in bytes since levenshtein() is not UTF-8 compatible
$termRestLength = strlen($termRest);
$scaleFactor = 1/(1 - $this->_minimumSimilarity);
foreach ($fields as $field) {
$index->resetTermsStream();
if ($prefix != '') {
$index->skipTo(new Zend_Search_Lucene_Index_Term($prefix, $field));
while ($index->currentTerm() !== null &&
$index->currentTerm()->field == $field &&
substr($index->currentTerm()->text, 0, $prefixByteLength) == $prefix) {
// Calculate similarity
$target = substr($index->currentTerm()->text, $prefixByteLength);
$maxDistance = isset($this->_maxDistances[strlen($target)])?
$this->_maxDistances[strlen($target)] :
$this->_calculateMaxDistance($prefixUtf8Length, $termRestLength, strlen($target));
if ($termRestLength == 0) {
// we don't have anything to compare. That means if we just add
// the letters for current term we get the new word
$similarity = (($prefixUtf8Length == 0)? 0 : 1 - strlen($target)/$prefixUtf8Length);
} else if (strlen($target) == 0) {
$similarity = (($prefixUtf8Length == 0)? 0 : 1 - $termRestLength/$prefixUtf8Length);
} else if ($maxDistance < abs($termRestLength - strlen($target))){
//just adding the characters of term to target or vice-versa results in too many edits
//for example "pre" length is 3 and "prefixes" length is 8. We can see that
//given this optimal circumstance, the edit distance cannot be less than 5.
//which is 8-3 or more precisesly abs(3-8).
//if our maximum edit distance is 4, then we can discard this word
//without looking at it.
$similarity = 0;
} else {
$similarity = 1 - levenshtein($termRest, $target)/($prefixUtf8Length + min($termRestLength, strlen($target)));
}
if ($similarity > $this->_minimumSimilarity) {
$this->_matches[] = $index->currentTerm();
$this->_termKeys[] = $index->currentTerm()->key();
$this->_scores[] = ($similarity - $this->_minimumSimilarity)*$scaleFactor;
}
$index->nextTerm();
}
} else {
$index->skipTo(new Zend_Search_Lucene_Index_Term('', $field));
while ($index->currentTerm() !== null && $index->currentTerm()->field == $field) {
// Calculate similarity
$target = $index->currentTerm()->text;
$maxDistance = isset($this->_maxDistances[strlen($target)])?
$this->_maxDistances[strlen($target)] :
$this->_calculateMaxDistance(0, $termRestLength, strlen($target));
if ($maxDistance < abs($termRestLength - strlen($target))){
//just adding the characters of term to target or vice-versa results in too many edits
//for example "pre" length is 3 and "prefixes" length is 8. We can see that
//given this optimal circumstance, the edit distance cannot be less than 5.
//which is 8-3 or more precisesly abs(3-8).
//if our maximum edit distance is 4, then we can discard this word
//without looking at it.
$similarity = 0;
} else {
$similarity = 1 - levenshtein($termRest, $target)/min($termRestLength, strlen($target));
}
if ($similarity > $this->_minimumSimilarity) {
$this->_matches[] = $index->currentTerm();
$this->_termKeys[] = $index->currentTerm()->key();
$this->_scores[] = ($similarity - $this->_minimumSimilarity)*$scaleFactor;
}
$index->nextTerm();
}
}
$index->closeTermsStream();
}
if (count($this->_matches) == 0) {
return new Zend_Search_Lucene_Search_Query_Empty();
} else if (count($this->_matches) == 1) {
return new Zend_Search_Lucene_Search_Query_Term(reset($this->_matches));
} else {
$rewrittenQuery = new Zend_Search_Lucene_Search_Query_Boolean();
array_multisort($this->_scores, SORT_DESC, SORT_NUMERIC,
$this->_termKeys, SORT_ASC, SORT_STRING,
$this->_matches);
$termCount = 0;
foreach ($this->_matches as $id => $matchedTerm) {
$subquery = new Zend_Search_Lucene_Search_Query_Term($matchedTerm);
$subquery->setBoost($this->_scores[$id]);
$rewrittenQuery->addSubquery($subquery);
$termCount++;
if ($termCount >= self::MAX_CLAUSE_COUNT) {
break;
}
}
return $rewrittenQuery;
}
}
/**
* Optimize query in the context of specified index
*
* @param Zend_Search_Lucene_Interface $index
* @return Zend_Search_Lucene_Search_Query
*/
public function optimize(Zend_Search_Lucene_Interface $index)
{
throw new Zend_Search_Lucene_Exception('Wildcard query should not be directly used for search. Use $query->rewrite($index)');
}
/**
* Return query terms
*
* @return array
* @throws Zend_Search_Lucene_Exception
*/
public function getQueryTerms()
{
if ($this->_matches === null) {
throw new Zend_Search_Lucene_Exception('Search has to be performed first to get matched terms');
}
return $this->_matches;
}
/**
* Constructs an appropriate Weight implementation for this query.
*
* @param Zend_Search_Lucene_Interface $reader
* @return Zend_Search_Lucene_Search_Weight
* @throws Zend_Search_Lucene_Exception
*/
public function createWeight(Zend_Search_Lucene_Interface $reader)
{
throw new Zend_Search_Lucene_Exception('Wildcard query should not be directly used for search. Use $query->rewrite($index)');
}
/**
* Execute query in context of index reader
* It also initializes necessary internal structures
*
* @param Zend_Search_Lucene_Interface $reader
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
* @throws Zend_Search_Lucene_Exception
*/
public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
{
throw new Zend_Search_Lucene_Exception('Wildcard query should not be directly used for search. Use $query->rewrite($index)');
}
/**
* Get document ids likely matching the query
*
* It's an array with document ids as keys (performance considerations)
*
* @return array
* @throws Zend_Search_Lucene_Exception
*/
public function matchedDocs()
{
throw new Zend_Search_Lucene_Exception('Wildcard query should not be directly used for search. Use $query->rewrite($index)');
}
/**
* Score specified document
*
* @param integer $docId
* @param Zend_Search_Lucene_Interface $reader
* @return float
* @throws Zend_Search_Lucene_Exception
*/
public function score($docId, Zend_Search_Lucene_Interface $reader)
{
throw new Zend_Search_Lucene_Exception('Wildcard query should not be directly used for search. Use $query->rewrite($index)');
}
/**
* Highlight query terms
*
* @param integer &$colorIndex
* @param Zend_Search_Lucene_Document_Html $doc
*/
public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)
{
$words = array();
foreach ($this->_matches as $term) {
$words[] = $term->text;
}
$doc->highlight($words, $this->_getHighlightColor($colorIndex));
}
/**
* Print a query
*
* @return string
*/
public function __toString()
{
// It's used only for query visualisation, so we don't care about characters escaping
return (($this->_term->field === null)? '' : $this->_term->field . ':')
. $this->_term->text . '~'
. (($this->_minimumSimilarity != self::DEFAULT_MIN_SIMILARITY)? round($this->_minimumSimilarity, 4) : '');
}
}
PK �HH[��8��h �h Query/Boolean.phpnu &1i� <?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Search_Query */
require_once 'Zend/Search/Lucene/Search/Query.php';
/** Zend_Search_Lucene_Search_Weight_Boolean */
require_once 'Zend/Search/Lucene/Search/Weight/Boolean.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Search_Query_Boolean extends Zend_Search_Lucene_Search_Query
{
/**
* Subqueries
* Array of Zend_Search_Lucene_Search_Query
*
* @var array
*/
private $_subqueries = array();
/**
* Subqueries signs.
* If true then subquery is required.
* If false then subquery is prohibited.
* If null then subquery is neither prohibited, nor required
*
* If array is null then all subqueries are required
*
* @var array
*/
private $_signs = array();
/**
* Result vector.
*
* @var array
*/
private $_resVector = null;
/**
* A score factor based on the fraction of all query subqueries
* that a document contains.
* float for conjunction queries
* array of float for non conjunction queries
*
* @var mixed
*/
private $_coord = null;
/**
* Class constructor. Create a new Boolean query object.
*
* if $signs array is omitted then all subqueries are required
* it differs from addSubquery() behavior, but should never be used
*
* @param array $subqueries Array of Zend_Search_Search_Query objects
* @param array $signs Array of signs. Sign is boolean|null.
* @return void
*/
public function __construct($subqueries = null, $signs = null)
{
if (is_array($subqueries)) {
$this->_subqueries = $subqueries;
$this->_signs = null;
// Check if all subqueries are required
if (is_array($signs)) {
foreach ($signs as $sign ) {
if ($sign !== true) {
$this->_signs = $signs;
break;
}
}
}
}
}
/**
* Add a $subquery (Zend_Search_Lucene_Search_Query) to this query.
*
* The sign is specified as:
* TRUE - subquery is required
* FALSE - subquery is prohibited
* NULL - subquery is neither prohibited, nor required
*
* @param Zend_Search_Lucene_Search_Query $subquery
* @param boolean|null $sign
* @return void
*/
public function addSubquery(Zend_Search_Lucene_Search_Query $subquery, $sign=null) {
if ($sign !== true || $this->_signs !== null) { // Skip, if all subqueries are required
if ($this->_signs === null) { // Check, If all previous subqueries are required
$this->_signs = array();
foreach ($this->_subqueries as $prevSubquery) {
$this->_signs[] = true;
}
}
$this->_signs[] = $sign;
}
$this->_subqueries[] = $subquery;
}
/**
* Re-write queries into primitive queries
*
* @param Zend_Search_Lucene_Interface $index
* @return Zend_Search_Lucene_Search_Query
*/
public function rewrite(Zend_Search_Lucene_Interface $index)
{
$query = new Zend_Search_Lucene_Search_Query_Boolean();
$query->setBoost($this->getBoost());
foreach ($this->_subqueries as $subqueryId => $subquery) {
$query->addSubquery($subquery->rewrite($index),
($this->_signs === null)? true : $this->_signs[$subqueryId]);
}
return $query;
}
/**
* Optimize query in the context of specified index
*
* @param Zend_Search_Lucene_Interface $index
* @return Zend_Search_Lucene_Search_Query
*/
public function optimize(Zend_Search_Lucene_Interface $index)
{
$subqueries = array();
$signs = array();
// Optimize all subqueries
foreach ($this->_subqueries as $id => $subquery) {
$subqueries[] = $subquery->optimize($index);
$signs[] = ($this->_signs === null)? true : $this->_signs[$id];
}
// Remove insignificant subqueries
foreach ($subqueries as $id => $subquery) {
if ($subquery instanceof Zend_Search_Lucene_Search_Query_Insignificant) {
// Insignificant subquery has to be removed anyway
unset($subqueries[$id]);
unset($signs[$id]);
}
}
if (count($subqueries) == 0) {
// Boolean query doesn't has non-insignificant subqueries
return new Zend_Search_Lucene_Search_Query_Insignificant();
}
// Check if all non-insignificant subqueries are prohibited
$allProhibited = true;
foreach ($signs as $sign) {
if ($sign !== false) {
$allProhibited = false;
break;
}
}
if ($allProhibited) {
return new Zend_Search_Lucene_Search_Query_Insignificant();
}
// Check for empty subqueries
foreach ($subqueries as $id => $subquery) {
if ($subquery instanceof Zend_Search_Lucene_Search_Query_Empty) {
if ($signs[$id] === true) {
// Matching is required, but is actually empty
return new Zend_Search_Lucene_Search_Query_Empty();
} else {
// Matching is optional or prohibited, but is empty
// Remove it from subqueries and signs list
unset($subqueries[$id]);
unset($signs[$id]);
}
}
}
// Check, if reduced subqueries list is empty
if (count($subqueries) == 0) {
return new Zend_Search_Lucene_Search_Query_Empty();
}
// Check if all non-empty subqueries are prohibited
$allProhibited = true;
foreach ($signs as $sign) {
if ($sign !== false) {
$allProhibited = false;
break;
}
}
if ($allProhibited) {
return new Zend_Search_Lucene_Search_Query_Empty();
}
// Check, if reduced subqueries list has only one entry
if (count($subqueries) == 1) {
// It's a query with only one required or optional clause
// (it's already checked, that it's not a prohibited clause)
if ($this->getBoost() == 1) {
return reset($subqueries);
}
$optimizedQuery = clone reset($subqueries);
$optimizedQuery->setBoost($optimizedQuery->getBoost()*$this->getBoost());
return $optimizedQuery;
}
// Prepare first candidate for optimized query
$optimizedQuery = new Zend_Search_Lucene_Search_Query_Boolean($subqueries, $signs);
$optimizedQuery->setBoost($this->getBoost());
$terms = array();
$tsigns = array();
$boostFactors = array();
// Try to decompose term and multi-term subqueries
foreach ($subqueries as $id => $subquery) {
if ($subquery instanceof Zend_Search_Lucene_Search_Query_Term) {
$terms[] = $subquery->getTerm();
$tsigns[] = $signs[$id];
$boostFactors[] = $subquery->getBoost();
// remove subquery from a subqueries list
unset($subqueries[$id]);
unset($signs[$id]);
} else if ($subquery instanceof Zend_Search_Lucene_Search_Query_MultiTerm) {
$subTerms = $subquery->getTerms();
$subSigns = $subquery->getSigns();
if ($signs[$id] === true) {
// It's a required multi-term subquery.
// Something like '... +(+term1 -term2 term3 ...) ...'
// Multi-term required subquery can be decomposed only if it contains
// required terms and doesn't contain prohibited terms:
// ... +(+term1 term2 ...) ... => ... +term1 term2 ...
//
// Check this
$hasRequired = false;
$hasProhibited = false;
if ($subSigns === null) {
// All subterms are required
$hasRequired = true;
} else {
foreach ($subSigns as $sign) {
if ($sign === true) {
$hasRequired = true;
} else if ($sign === false) {
$hasProhibited = true;
break;
}
}
}
// Continue if subquery has prohibited terms or doesn't have required terms
if ($hasProhibited || !$hasRequired) {
continue;
}
foreach ($subTerms as $termId => $term) {
$terms[] = $term;
$tsigns[] = ($subSigns === null)? true : $subSigns[$termId];
$boostFactors[] = $subquery->getBoost();
}
// remove subquery from a subqueries list
unset($subqueries[$id]);
unset($signs[$id]);
} else { // $signs[$id] === null || $signs[$id] === false
// It's an optional or prohibited multi-term subquery.
// Something like '... (+term1 -term2 term3 ...) ...'
// or
// something like '... -(+term1 -term2 term3 ...) ...'
// Multi-term optional and required subqueries can be decomposed
// only if all terms are optional.
//
// Check if all terms are optional.
$onlyOptional = true;
if ($subSigns === null) {
// All subterms are required
$onlyOptional = false;
} else {
foreach ($subSigns as $sign) {
if ($sign !== null) {
$onlyOptional = false;
break;
}
}
}
// Continue if non-optional terms are presented in this multi-term subquery
if (!$onlyOptional) {
continue;
}
foreach ($subTerms as $termId => $term) {
$terms[] = $term;
$tsigns[] = ($signs[$id] === null)? null /* optional */ :
false /* prohibited */;
$boostFactors[] = $subquery->getBoost();
}
// remove subquery from a subqueries list
unset($subqueries[$id]);
unset($signs[$id]);
}
}
}
// Check, if there are no decomposed subqueries
if (count($terms) == 0 ) {
// return prepared candidate
return $optimizedQuery;
}
// Check, if all subqueries have been decomposed and all terms has the same boost factor
if (count($subqueries) == 0 && count(array_unique($boostFactors)) == 1) {
$optimizedQuery = new Zend_Search_Lucene_Search_Query_MultiTerm($terms, $tsigns);
$optimizedQuery->setBoost(reset($boostFactors)*$this->getBoost());
return $optimizedQuery;
}
// This boolean query can't be transformed to Term/MultiTerm query and still contains
// several subqueries
// Separate prohibited terms
$prohibitedTerms = array();
foreach ($terms as $id => $term) {
if ($tsigns[$id] === false) {
$prohibitedTerms[] = $term;
unset($terms[$id]);
unset($tsigns[$id]);
unset($boostFactors[$id]);
}
}
if (count($terms) == 1) {
$clause = new Zend_Search_Lucene_Search_Query_Term(reset($terms));
$clause->setBoost(reset($boostFactors));
$subqueries[] = $clause;
$signs[] = reset($tsigns);
// Clear terms list
$terms = array();
} else if (count($terms) > 1 && count(array_unique($boostFactors)) == 1) {
$clause = new Zend_Search_Lucene_Search_Query_MultiTerm($terms, $tsigns);
$clause->setBoost(reset($boostFactors));
$subqueries[] = $clause;
// Clause sign is 'required' if clause contains required terms. 'Optional' otherwise.
$signs[] = (in_array(true, $tsigns))? true : null;
// Clear terms list
$terms = array();
}
if (count($prohibitedTerms) == 1) {
// (boost factors are not significant for prohibited clauses)
$subqueries[] = new Zend_Search_Lucene_Search_Query_Term(reset($prohibitedTerms));
$signs[] = false;
// Clear prohibited terms list
$prohibitedTerms = array();
} else if (count($prohibitedTerms) > 1) {
// prepare signs array
$prohibitedSigns = array();
foreach ($prohibitedTerms as $id => $term) {
// all prohibited term are grouped as optional into multi-term query
$prohibitedSigns[$id] = null;
}
// (boost factors are not significant for prohibited clauses)
$subqueries[] = new Zend_Search_Lucene_Search_Query_MultiTerm($prohibitedTerms, $prohibitedSigns);
// Clause sign is 'prohibited'
$signs[] = false;
// Clear terms list
$prohibitedTerms = array();
}
/** @todo Group terms with the same boost factors together */
// Check, that all terms are processed
// Replace candidate for optimized query
if (count($terms) == 0 && count($prohibitedTerms) == 0) {
$optimizedQuery = new Zend_Search_Lucene_Search_Query_Boolean($subqueries, $signs);
$optimizedQuery->setBoost($this->getBoost());
}
return $optimizedQuery;
}
/**
* Returns subqueries
*
* @return array
*/
public function getSubqueries()
{
return $this->_subqueries;
}
/**
* Return subqueries signs
*
* @return array
*/
public function getSigns()
{
return $this->_signs;
}
/**
* Constructs an appropriate Weight implementation for this query.
*
* @param Zend_Search_Lucene_Interface $reader
* @return Zend_Search_Lucene_Search_Weight
*/
public function createWeight(Zend_Search_Lucene_Interface $reader)
{
$this->_weight = new Zend_Search_Lucene_Search_Weight_Boolean($this, $reader);
return $this->_weight;
}
/**
* Calculate result vector for Conjunction query
* (like '<subquery1> AND <subquery2> AND <subquery3>')
*/
private function _calculateConjunctionResult()
{
$this->_resVector = null;
if (count($this->_subqueries) == 0) {
$this->_resVector = array();
}
$resVectors = array();
$resVectorsSizes = array();
$resVectorsIds = array(); // is used to prevent arrays comparison
foreach ($this->_subqueries as $subqueryId => $subquery) {
$resVectors[] = $subquery->matchedDocs();
$resVectorsSizes[] = count(end($resVectors));
$resVectorsIds[] = $subqueryId;
}
// sort resvectors in order of subquery cardinality increasing
array_multisort($resVectorsSizes, SORT_ASC, SORT_NUMERIC,
$resVectorsIds, SORT_ASC, SORT_NUMERIC,
$resVectors);
foreach ($resVectors as $nextResVector) {
if($this->_resVector === null) {
$this->_resVector = $nextResVector;
} else {
//$this->_resVector = array_intersect_key($this->_resVector, $nextResVector);
/**
* This code is used as workaround for array_intersect_key() slowness problem.
*/
$updatedVector = array();
foreach ($this->_resVector as $id => $value) {
if (isset($nextResVector[$id])) {
$updatedVector[$id] = $value;
}
}
$this->_resVector = $updatedVector;
}
if (count($this->_resVector) == 0) {
// Empty result set, we don't need to check other terms
break;
}
}
// ksort($this->_resVector, SORT_NUMERIC);
// Used algorithm doesn't change elements order
}
/**
* Calculate result vector for non Conjunction query
* (like '<subquery1> AND <subquery2> AND NOT <subquery3> OR <subquery4>')
*/
private function _calculateNonConjunctionResult()
{
$requiredVectors = array();
$requiredVectorsSizes = array();
$requiredVectorsIds = array(); // is used to prevent arrays comparison
$optional = array();
foreach ($this->_subqueries as $subqueryId => $subquery) {
if ($this->_signs[$subqueryId] === true) {
// required
$requiredVectors[] = $subquery->matchedDocs();
$requiredVectorsSizes[] = count(end($requiredVectors));
$requiredVectorsIds[] = $subqueryId;
} elseif ($this->_signs[$subqueryId] === false) {
// prohibited
// Do nothing. matchedDocs() may include non-matching id's
// Calculating prohibited vector may take significant time, but do not affect the result
// Skipped.
} else {
// neither required, nor prohibited
// array union
$optional += $subquery->matchedDocs();
}
}
// sort resvectors in order of subquery cardinality increasing
array_multisort($requiredVectorsSizes, SORT_ASC, SORT_NUMERIC,
$requiredVectorsIds, SORT_ASC, SORT_NUMERIC,
$requiredVectors);
$required = null;
foreach ($requiredVectors as $nextResVector) {
if($required === null) {
$required = $nextResVector;
} else {
//$required = array_intersect_key($required, $nextResVector);
/**
* This code is used as workaround for array_intersect_key() slowness problem.
*/
$updatedVector = array();
foreach ($required as $id => $value) {
if (isset($nextResVector[$id])) {
$updatedVector[$id] = $value;
}
}
$required = $updatedVector;
}
if (count($required) == 0) {
// Empty result set, we don't need to check other terms
break;
}
}
if ($required !== null) {
$this->_resVector = &$required;
} else {
$this->_resVector = &$optional;
}
ksort($this->_resVector, SORT_NUMERIC);
}
/**
* Score calculator for conjunction queries (all subqueries are required)
*
* @param integer $docId
* @param Zend_Search_Lucene_Interface $reader
* @return float
*/
public function _conjunctionScore($docId, Zend_Search_Lucene_Interface $reader)
{
if ($this->_coord === null) {
$this->_coord = $reader->getSimilarity()->coord(count($this->_subqueries),
count($this->_subqueries) );
}
$score = 0;
foreach ($this->_subqueries as $subquery) {
$subscore = $subquery->score($docId, $reader);
if ($subscore == 0) {
return 0;
}
$score += $subquery->score($docId, $reader) * $this->_coord;
}
return $score * $this->_coord * $this->getBoost();
}
/**
* Score calculator for non conjunction queries (not all subqueries are required)
*
* @param integer $docId
* @param Zend_Search_Lucene_Interface $reader
* @return float
*/
public function _nonConjunctionScore($docId, Zend_Search_Lucene_Interface $reader)
{
if ($this->_coord === null) {
$this->_coord = array();
$maxCoord = 0;
foreach ($this->_signs as $sign) {
if ($sign !== false /* not prohibited */) {
$maxCoord++;
}
}
for ($count = 0; $count <= $maxCoord; $count++) {
$this->_coord[$count] = $reader->getSimilarity()->coord($count, $maxCoord);
}
}
$score = 0;
$matchedSubqueries = 0;
foreach ($this->_subqueries as $subqueryId => $subquery) {
$subscore = $subquery->score($docId, $reader);
// Prohibited
if ($this->_signs[$subqueryId] === false && $subscore != 0) {
return 0;
}
// is required, but doen't match
if ($this->_signs[$subqueryId] === true && $subscore == 0) {
return 0;
}
if ($subscore != 0) {
$matchedSubqueries++;
$score += $subscore;
}
}
return $score * $this->_coord[$matchedSubqueries] * $this->getBoost();
}
/**
* Execute query in context of index reader
* It also initializes necessary internal structures
*
* @param Zend_Search_Lucene_Interface $reader
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
*/
public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
{
// Initialize weight if it's not done yet
$this->_initWeight($reader);
if ($docsFilter === null) {
// Create local documents filter if it's not provided by upper query
$docsFilter = new Zend_Search_Lucene_Index_DocsFilter();
}
foreach ($this->_subqueries as $subqueryId => $subquery) {
if ($this->_signs == null || $this->_signs[$subqueryId] === true) {
// Subquery is required
$subquery->execute($reader, $docsFilter);
} else {
$subquery->execute($reader);
}
}
if ($this->_signs === null) {
$this->_calculateConjunctionResult();
} else {
$this->_calculateNonConjunctionResult();
}
}
/**
* Get document ids likely matching the query
*
* It's an array with document ids as keys (performance considerations)
*
* @return array
*/
public function matchedDocs()
{
return $this->_resVector;
}
/**
* Score specified document
*
* @param integer $docId
* @param Zend_Search_Lucene_Interface $reader
* @return float
*/
public function score($docId, Zend_Search_Lucene_Interface $reader)
{
if (isset($this->_resVector[$docId])) {
if ($this->_signs === null) {
return $this->_conjunctionScore($docId, $reader);
} else {
return $this->_nonConjunctionScore($docId, $reader);
}
} else {
return 0;
}
}
/**
* Return query terms
*
* @return array
*/
public function getQueryTerms()
{
$terms = array();
foreach ($this->_subqueries as $id => $subquery) {
if ($this->_signs === null || $this->_signs[$id] !== false) {
$terms = array_merge($terms, $subquery->getQueryTerms());
}
}
return $terms;
}
/**
* Highlight query terms
*
* @param integer &$colorIndex
* @param Zend_Search_Lucene_Document_Html $doc
*/
public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)
{
foreach ($this->_subqueries as $id => $subquery) {
if ($this->_signs === null || $this->_signs[$id] !== false) {
$subquery->highlightMatchesDOM($doc, $colorIndex);
}
}
}
/**
* Print a query
*
* @return string
*/
public function __toString()
{
// It's used only for query visualisation, so we don't care about characters escaping
$query = '';
foreach ($this->_subqueries as $id => $subquery) {
if ($id != 0) {
$query .= ' ';
}
if ($this->_signs === null || $this->_signs[$id] === true) {
$query .= '+';
} else if ($this->_signs[$id] === false) {
$query .= '-';
}
$query .= '(' . $subquery->__toString() . ')';
if ($subquery->getBoost() != 1) {
$query .= '^' . round($subquery->getBoost(), 4);
}
}
return $query;
}
}
PK �HH[o|��.&