Current File : /home/k/a/r/karenpetzb/www/items/category/Term.php.tar |
home/karenpetzb/library/Zend/Search/Lucene/Search/Weight/Term.php 0000604 00000006000 15071363755 0020705 0 ustar 00 <?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Search_Weight */
require_once 'Zend/Search/Lucene/Search/Weight.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Search_Weight_Term extends Zend_Search_Lucene_Search_Weight
{
/**
* IndexReader.
*
* @var Zend_Search_Lucene_Interface
*/
private $_reader;
/**
* Term
*
* @var Zend_Search_Lucene_Index_Term
*/
private $_term;
/**
* The query that this concerns.
*
* @var Zend_Search_Lucene_Search_Query
*/
private $_query;
/**
* Score factor
*
* @var float
*/
private $_idf;
/**
* Query weight
*
* @var float
*/
private $_queryWeight;
/**
* Zend_Search_Lucene_Search_Weight_Term constructor
* reader - index reader
*
* @param Zend_Search_Lucene_Index_Term $term
* @param Zend_Search_Lucene_Search_Query $query
* @param Zend_Search_Lucene_Interface $reader
*/
public function __construct(Zend_Search_Lucene_Index_Term $term,
Zend_Search_Lucene_Search_Query $query,
Zend_Search_Lucene_Interface $reader)
{
$this->_term = $term;
$this->_query = $query;
$this->_reader = $reader;
}
/**
* The sum of squared weights of contained query clauses.
*
* @return float
*/
public function sumOfSquaredWeights()
{
// compute idf
$this->_idf = $this->_reader->getSimilarity()->idf($this->_term, $this->_reader);
// compute query weight
$this->_queryWeight = $this->_idf * $this->_query->getBoost();
// square it
return $this->_queryWeight * $this->_queryWeight;
}
/**
* Assigns the query normalization factor to this.
*
* @param float $queryNorm
*/
public function normalize($queryNorm)
{
$this->_queryNorm = $queryNorm;
// normalize query weight
$this->_queryWeight *= $queryNorm;
// idf for documents
$this->_value = $this->_queryWeight * $this->_idf;
}
}
home/karenpetzb/library/Zend/Search/Lucene/Search/Query/Term.php 0000604 00000013522 15071437235 0020565 0 ustar 00 <?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Search_Query */
require_once 'Zend/Search/Lucene/Search/Query.php';
/** Zend_Search_Lucene_Search_Weight_Term */
require_once 'Zend/Search/Lucene/Search/Weight/Term.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Search_Query_Term extends Zend_Search_Lucene_Search_Query
{
/**
* Term to find.
*
* @var Zend_Search_Lucene_Index_Term
*/
private $_term;
/**
* Documents vector.
*
* @var array
*/
private $_docVector = null;
/**
* Term freqs vector.
* array(docId => freq, ...)
*
* @var array
*/
private $_termFreqs;
/**
* Zend_Search_Lucene_Search_Query_Term constructor
*
* @param Zend_Search_Lucene_Index_Term $term
* @param boolean $sign
*/
public function __construct(Zend_Search_Lucene_Index_Term $term)
{
$this->_term = $term;
}
/**
* Re-write query into primitive queries in the context of specified index
*
* @param Zend_Search_Lucene_Interface $index
* @return Zend_Search_Lucene_Search_Query
*/
public function rewrite(Zend_Search_Lucene_Interface $index)
{
if ($this->_term->field != null) {
return $this;
} else {
$query = new Zend_Search_Lucene_Search_Query_MultiTerm();
$query->setBoost($this->getBoost());
foreach ($index->getFieldNames(true) as $fieldName) {
$term = new Zend_Search_Lucene_Index_Term($this->_term->text, $fieldName);
$query->addTerm($term);
}
return $query->rewrite($index);
}
}
/**
* Optimize query in the context of specified index
*
* @param Zend_Search_Lucene_Interface $index
* @return Zend_Search_Lucene_Search_Query
*/
public function optimize(Zend_Search_Lucene_Interface $index)
{
// Check, that index contains specified term
if (!$index->hasTerm($this->_term)) {
return new Zend_Search_Lucene_Search_Query_Empty();
}
return $this;
}
/**
* Constructs an appropriate Weight implementation for this query.
*
* @param Zend_Search_Lucene_Interface $reader
* @return Zend_Search_Lucene_Search_Weight
*/
public function createWeight(Zend_Search_Lucene_Interface $reader)
{
$this->_weight = new Zend_Search_Lucene_Search_Weight_Term($this->_term, $this, $reader);
return $this->_weight;
}
/**
* Execute query in context of index reader
* It also initializes necessary internal structures
*
* @param Zend_Search_Lucene_Interface $reader
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
*/
public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
{
$this->_docVector = array_flip($reader->termDocs($this->_term, $docsFilter));
$this->_termFreqs = $reader->termFreqs($this->_term, $docsFilter);
// Initialize weight if it's not done yet
$this->_initWeight($reader);
}
/**
* Get document ids likely matching the query
*
* It's an array with document ids as keys (performance considerations)
*
* @return array
*/
public function matchedDocs()
{
return $this->_docVector;
}
/**
* Score specified document
*
* @param integer $docId
* @param Zend_Search_Lucene_Interface $reader
* @return float
*/
public function score($docId, Zend_Search_Lucene_Interface $reader)
{
if (isset($this->_docVector[$docId])) {
return $reader->getSimilarity()->tf($this->_termFreqs[$docId]) *
$this->_weight->getValue() *
$reader->norm($docId, $this->_term->field) *
$this->getBoost();
} else {
return 0;
}
}
/**
* Return query terms
*
* @return array
*/
public function getQueryTerms()
{
return array($this->_term);
}
/**
* Return query term
*
* @return Zend_Search_Lucene_Index_Term
*/
public function getTerm()
{
return $this->_term;
}
/**
* Returns query term
*
* @return array
*/
public function getTerms()
{
return $this->_terms;
}
/**
* Highlight query terms
*
* @param integer &$colorIndex
* @param Zend_Search_Lucene_Document_Html $doc
*/
public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)
{
$doc->highlight($this->_term->text, $this->_getHighlightColor($colorIndex));
}
/**
* Print a query
*
* @return string
*/
public function __toString()
{
// It's used only for query visualisation, so we don't care about characters escaping
return (($this->_term->field === null)? '':$this->_term->field . ':') . $this->_term->text;
}
}
home/karenpetzb/library/Zend/Search/Lucene/Index/Term.php 0000604 00000007124 15071464647 0017332 0 ustar 00 <?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Index
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/**
* A Term represents a word from text. This is the unit of search. It is
* composed of two elements, the text of the word, as a string, and the name of
* the field that the text occured in, an interned string.
*
* Note that terms may represent more than words from text fields, but also
* things like dates, email addresses, urls, etc.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Index
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Index_Term
{
/**
* Field name or field number (depending from context)
*
* @var mixed
*/
public $field;
/**
* Term value
*
* @var string
*/
public $text;
/**
* Object constructor
*/
public function __construct($text, $field = null)
{
$this->field = ($field === null)? Zend_Search_Lucene::getDefaultSearchField() : $field;
$this->text = $text;
}
/**
* Returns term key
*
* @return string
*/
public function key()
{
return $this->field . chr(0) . $this->text;
}
/**
* Get term prefix
*
* @param string $str
* @param integer $length
* @return string
*/
public static function getPrefix($str, $length)
{
$prefixBytes = 0;
$prefixChars = 0;
while ($prefixBytes < strlen($str) && $prefixChars < $length) {
$charBytes = 1;
if ((ord($str[$prefixBytes]) & 0xC0) == 0xC0) {
$charBytes++;
if (ord($str[$prefixBytes]) & 0x20 ) {
$charBytes++;
if (ord($str[$prefixBytes]) & 0x10 ) {
$charBytes++;
}
}
}
if ($prefixBytes + $charBytes > strlen($str)) {
// wrong character
break;
}
$prefixChars++;
$prefixBytes += $charBytes;
}
return substr($str, 0, $prefixBytes);
}
/**
* Get UTF-8 string length
*
* @param string $str
* @return string
*/
public static function getLength($str)
{
$bytes = 0;
$chars = 0;
while ($bytes < strlen($str)) {
$charBytes = 1;
if ((ord($str[$bytes]) & 0xC0) == 0xC0) {
$charBytes++;
if (ord($str[$bytes]) & 0x20 ) {
$charBytes++;
if (ord($str[$bytes]) & 0x10 ) {
$charBytes++;
}
}
}
if ($bytes + $charBytes > strlen($str)) {
// wrong character
break;
}
$chars++;
$bytes += $charBytes;
}
return $chars;
}
}
home/karenpetzb/library/Zend/Search/Lucene/Search/QueryEntry/Term.php 0000604 00000013725 15071472646 0021621 0 ustar 00 <?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Index_Term */
require_once 'Zend/Search/Lucene/Index/Term.php';
/** Zend_Search_Lucene_Exception */
require_once 'Zend/Search/Lucene/Exception.php';
/** Zend_Search_Lucene_Search_QueryEntry */
require_once 'Zend/Search/Lucene/Search/QueryEntry.php';
/** Zend_Search_Lucene_Search_QueryParserException */
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
/** Zend_Search_Lucene_Analysis_Analyzer */
require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Search_QueryEntry_Term extends Zend_Search_Lucene_Search_QueryEntry
{
/**
* Term value
*
* @var string
*/
private $_term;
/**
* Field
*
* @var string|null
*/
private $_field;
/**
* Fuzzy search query
*
* @var boolean
*/
private $_fuzzyQuery = false;
/**
* Similarity
*
* @var float
*/
private $_similarity = 1.;
/**
* Object constractor
*
* @param string $term
* @param string $field
*/
public function __construct($term, $field)
{
$this->_term = $term;
$this->_field = $field;
}
/**
* Process modifier ('~')
*
* @param mixed $parameter
*/
public function processFuzzyProximityModifier($parameter = null)
{
$this->_fuzzyQuery = true;
if ($parameter !== null) {
$this->_similarity = $parameter;
} else {
$this->_similarity = Zend_Search_Lucene_Search_Query_Fuzzy::DEFAULT_MIN_SIMILARITY;
}
}
/**
* Transform entry to a subquery
*
* @param string $encoding
* @return Zend_Search_Lucene_Search_Query
* @throws Zend_Search_Lucene_Search_QueryParserException
*/
public function getQuery($encoding)
{
if (strpos($this->_term, '?') !== false || strpos($this->_term, '*') !== false) {
if ($this->_fuzzyQuery) {
throw new Zend_Search_Lucene_Search_QueryParserException('Fuzzy search is not supported for terms with wildcards.');
}
$pattern = '';
$subPatterns = explode('*', $this->_term);
$astericFirstPass = true;
foreach ($subPatterns as $subPattern) {
if (!$astericFirstPass) {
$pattern .= '*';
} else {
$astericFirstPass = false;
}
$subPatternsL2 = explode('?', $subPattern);
$qMarkFirstPass = true;
foreach ($subPatternsL2 as $subPatternL2) {
if (!$qMarkFirstPass) {
$pattern .= '?';
} else {
$qMarkFirstPass = false;
}
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($subPatternL2, $encoding);
if (count($tokens) > 1) {
throw new Zend_Search_Lucene_Search_QueryParserException('Wildcard search is supported only for non-multiple word terms');
}
foreach ($tokens as $token) {
$pattern .= $token->getTermText();
}
}
}
$term = new Zend_Search_Lucene_Index_Term($pattern, $this->_field);
$query = new Zend_Search_Lucene_Search_Query_Wildcard($term);
$query->setBoost($this->_boost);
return $query;
}
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_term, $encoding);
if (count($tokens) == 0) {
return new Zend_Search_Lucene_Search_Query_Insignificant();
}
if (count($tokens) == 1 && !$this->_fuzzyQuery) {
$term = new Zend_Search_Lucene_Index_Term($tokens[0]->getTermText(), $this->_field);
$query = new Zend_Search_Lucene_Search_Query_Term($term);
$query->setBoost($this->_boost);
return $query;
}
if (count($tokens) == 1 && $this->_fuzzyQuery) {
$term = new Zend_Search_Lucene_Index_Term($tokens[0]->getTermText(), $this->_field);
$query = new Zend_Search_Lucene_Search_Query_Fuzzy($term, $this->_similarity);
$query->setBoost($this->_boost);
return $query;
}
if ($this->_fuzzyQuery) {
throw new Zend_Search_Lucene_Search_QueryParserException('Fuzzy search is supported only for non-multiple word terms');
}
//It's not empty or one term query
$query = new Zend_Search_Lucene_Search_Query_MultiTerm();
/**
* @todo Process $token->getPositionIncrement() to support stemming, synonyms and other
* analizer design features
*/
foreach ($tokens as $token) {
$term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $this->_field);
$query->addTerm($term, true); // all subterms are required
}
$query->setBoost($this->_boost);
return $query;
}
}