[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[sup-devel] [PATCH] fix handling of multiple label: terms in search
By default Xapian will join query terms with the same prefix with OR instead
of AND, so searching for multiple labels doesn't return the expected results.
By making use of a parameter to add_boolean_prefix (added in Xapian 1.2) we
can tell Xapian to use OR only for the search terms that are guaranteed to be
unique.
Signed-off-by: Sascha Silbe <sascha-pgp@silbe.org>
---
lib/sup/index.rb | 74 +++++++++++++++++++++++++++---------------------------
1 files changed, 37 insertions(+), 37 deletions(-)
Tested on Debian Squeeze with Ruby 1.8.7.302 and Xapian 1.2.3.
diff --git a/lib/sup/index.rb b/lib/sup/index.rb
index 9273f18..a72bec6 100644
--- a/lib/sup/index.rb
+++ b/lib/sup/index.rb
@@ -419,8 +419,8 @@ EOS
qp.stemming_strategy = Xapian::QueryParser::STEM_SOME
qp.default_op = Xapian::Query::OP_AND
qp.add_valuerangeprocessor(Xapian::NumberValueRangeProcessor.new(DATE_VALUENO, 'date:', true))
- NORMAL_PREFIX.each { |k,vs| vs.each { |v| qp.add_prefix k, v } }
- BOOLEAN_PREFIX.each { |k,vs| vs.each { |v| qp.add_boolean_prefix k, v } }
+ NORMAL_PREFIX.each { |k,info| info[:prefix].each { |v| qp.add_prefix k, v } }
+ BOOLEAN_PREFIX.each { |k,info| info[:prefix].each { |v| qp.add_boolean_prefix k, v, info[:exclusive] } }
begin
xapian_query = qp.parse_query(subs, Xapian::QueryParser::FLAG_PHRASE|Xapian::QueryParser::FLAG_BOOLEAN|Xapian::QueryParser::FLAG_LOVEHATE|Xapian::QueryParser::FLAG_WILDCARD)
@@ -471,31 +471,31 @@ EOS
# Stemmed
NORMAL_PREFIX = {
- 'subject' => 'S',
- 'body' => 'B',
- 'from_name' => 'FN',
- 'to_name' => 'TN',
- 'name' => %w(FN TN),
- 'attachment' => 'A',
- 'email_text' => 'E',
- '' => %w(S B FN TN A E),
+ 'subject' => {:prefix => 'S', :exclusive => false},
+ 'body' => {:prefix => 'B', :exclusive => false},
+ 'from_name' => {:prefix => 'FN', :exclusive => false},
+ 'to_name' => {:prefix => 'TN', :exclusive => false},
+ 'name' => {:prefix => %w(FN TN), :exclusive => false},
+ 'attachment' => {:prefix => 'A', :exclusive => false},
+ 'email_text' => {:prefix => 'E', :exclusive => false},
+ '' => {:prefix => %w(S B FN TN A E), :exclusive => false},
}
# Unstemmed
BOOLEAN_PREFIX = {
- 'type' => 'K',
- 'from_email' => 'FE',
- 'to_email' => 'TE',
- 'email' => %w(FE TE),
- 'date' => 'D',
- 'label' => 'L',
- 'source_id' => 'I',
- 'attachment_extension' => 'O',
- 'msgid' => 'Q',
- 'id' => 'Q',
- 'thread' => 'H',
- 'ref' => 'R',
- 'location' => 'J',
+ 'type' => {:prefix => 'K', :exclusive => true},
+ 'from_email' => {:prefix => 'FE', :exclusive => false},
+ 'to_email' => {:prefix => 'TE', :exclusive => false},
+ 'email' => {:prefix => %w(FE TE), :exclusive => false},
+ 'date' => {:prefix => 'D', :exclusive => true},
+ 'label' => {:prefix => 'L', :exclusive => false},
+ 'source_id' => {:prefix => 'I', :exclusive => true},
+ 'attachment_extension' => {:prefix => 'O', :exclusive => false},
+ 'msgid' => {:prefix => 'Q', :exclusive => true},
+ 'id' => {:prefix => 'Q', :exclusive => true},
+ 'thread' => {:prefix => 'H', :exclusive => false},
+ 'ref' => {:prefix => 'R', :exclusive => false},
+ 'location' => {:prefix => 'J', :exclusive => false},
}
PREFIX = NORMAL_PREFIX.merge BOOLEAN_PREFIX
@@ -661,8 +661,8 @@ EOS
# Person names are indexed with several prefixes
person_termer = lambda do |d|
lambda do |p|
- doc.index_text p.name, PREFIX["#{d}_name"] if p.name
- doc.index_text p.email, PREFIX['email_text']
+ doc.index_text p.name, PREFIX["#{d}_name"][:prefix] if p.name
+ doc.index_text p.email, PREFIX['email_text'][:prefix]
doc.add_term mkterm(:email, d, p.email)
end
end
@@ -673,9 +673,9 @@ EOS
# Full text search content
subject_text = m.indexable_subject
body_text = m.indexable_body
- doc.index_text subject_text, PREFIX['subject']
- doc.index_text body_text, PREFIX['body']
- m.attachments.each { |a| doc.index_text a, PREFIX['attachment'] }
+ doc.index_text subject_text, PREFIX['subject'][:prefix]
+ doc.index_text body_text, PREFIX['body'][:prefix]
+ m.attachments.each { |a| doc.index_text a, PREFIX['attachment'][:prefix] }
# Miscellaneous terms
doc.add_term mkterm(:date, m.date) if m.date
@@ -753,25 +753,25 @@ EOS
def mkterm type, *args
case type
when :label
- PREFIX['label'] + args[0].to_s.downcase
+ PREFIX['label'][:prefix] + args[0].to_s.downcase
when :type
- PREFIX['type'] + args[0].to_s.downcase
+ PREFIX['type'][:prefix] + args[0].to_s.downcase
when :date
- PREFIX['date'] + args[0].getutc.strftime("%Y%m%d%H%M%S")
+ PREFIX['date'][:prefix] + args[0].getutc.strftime("%Y%m%d%H%M%S")
when :email
case args[0]
- when :from then PREFIX['from_email']
- when :to then PREFIX['to_email']
+ when :from then PREFIX['from_email'][:prefix]
+ when :to then PREFIX['to_email'][:prefix]
else raise "Invalid email term type #{args[0]}"
end + args[1].to_s.downcase
when :source_id
- PREFIX['source_id'] + args[0].to_s.downcase
+ PREFIX['source_id'][:prefix] + args[0].to_s.downcase
when :location
- PREFIX['location'] + [args[0]].pack('n') + args[1].to_s
+ PREFIX['location'][:prefix] + [args[0]].pack('n') + args[1].to_s
when :attachment_extension
- PREFIX['attachment_extension'] + args[0].to_s.downcase
+ PREFIX['attachment_extension'][:prefix] + args[0].to_s.downcase
when :msgid, :ref, :thread
- PREFIX[type.to_s] + args[0][0...(MAX_TERM_LENGTH-1)]
+ PREFIX[type.to_s][:prefix] + args[0][0...(MAX_TERM_LENGTH-1)]
else
raise "Invalid term type #{type}"
end
--
1.7.1
_______________________________________________
Sup-devel mailing list
Sup-devel@rubyforge.org
http://rubyforge.org/mailman/listinfo/sup-devel