OpenTREP Logo  0.07.7
C++ Open Travel Request Parsing Library
QuerySlices.cpp
Go to the documentation of this file.
1 // //////////////////////////////////////////////////////////////////////
2 // Import section
3 // //////////////////////////////////////////////////////////////////////
4 // STL
5 #include <cassert>
6 #include <sstream>
7 #include <set>
8 // OpenTrep
13 #include <opentrep/bom/Filter.hpp>
16 
17 namespace OPENTREP {
18 
19  // //////////////////////////////////////////////////////////////////////
20  QuerySlices::QuerySlices (const Xapian::Database& iDatabase,
21  const TravelQuery_T& iQueryString,
22  const OTransliterator& iTransliterator)
23  : _database (iDatabase), _queryString (iQueryString) {
24  init (iTransliterator);
25  }
26 
27  // //////////////////////////////////////////////////////////////////////
29  }
30 
31  // //////////////////////////////////////////////////////////////////////
32  void QuerySlices::push_back (const StringPartition& iStringPartition) {
33  if (iStringPartition.empty() == false) {
34  _slices.push_back (iStringPartition);
35  }
36  }
37 
38  // //////////////////////////////////////////////////////////////////////
39  size_t QuerySlices::size() const {
40  return _slices.size();
41  }
42 
43  // //////////////////////////////////////////////////////////////////////
44  bool QuerySlices::empty() const {
45  return _slices.empty();
46  }
47 
48  // //////////////////////////////////////////////////////////////////////
50  _slices.clear();
51  }
52 
53  // //////////////////////////////////////////////////////////////////////
54  std::string QuerySlices::describeKey() const {
55  std::ostringstream oStr;
56  oStr << "";
57  return oStr.str();
58  }
59 
60  // //////////////////////////////////////////////////////////////////////
61  std::string QuerySlices::describe() const {
62  std::ostringstream oStr;
63  oStr << describeKey();
64 
65  //
66  oStr << "[ ";
67 
68  short idx_sublist = 0;
69  for (StringPartitionList_T::const_iterator itSlice = _slices.begin();
70  itSlice != _slices.end(); ++itSlice, ++idx_sublist) {
71  //
72  if (idx_sublist != 0) {
73  oStr << "; ";
74  }
75 
76  //
77  const StringPartition& lStringPartition = *itSlice;
78 
79  //
80  oStr << idx_sublist << ". " << lStringPartition;
81  }
82 
83  //
84  oStr << " ]";
85 
86  return oStr.str();
87  }
88 
89  // //////////////////////////////////////////////////////////////////////
90  void QuerySlices::toStream (std::ostream& ioOut) const {
91  ioOut << describe();
92  }
93 
94  // //////////////////////////////////////////////////////////////////////
95  void QuerySlices::fromStream (std::istream& ioIn) {
96  }
97 
109  // //////////////////////////////////////////////////////////////////////
110  static unsigned int calculateEditDistance (const TravelQuery_T& iPhrase) {
111  NbOfErrors_T oEditDistance = 2;
112 
113  const NbOfErrors_T lQueryStringSize = iPhrase.size();
114 
115  oEditDistance = lQueryStringSize / K_DEFAULT_SIZE_FOR_SPELLING_ERROR_UNIT;
116  return oEditDistance;
117  }
118 
122  // //////////////////////////////////////////////////////////////////////
123  bool doesMatch (const Xapian::Database& iDatabase,
124  const std::string& iWord1, const std::string& iWord2) {
125  bool oDoesMatch = false;
126 
127  //
128  std::ostringstream oStr;
129  oStr << iWord1 << " " << iWord2;
130  const std::string lQueryString (oStr.str());
131 
132  // Catch any Xapian::Error exceptions thrown
133  Xapian::MSet lMatchingSet;
134  try {
135 
136  // Build the query object
137  Xapian::QueryParser lQueryParser;
138  lQueryParser.set_database (iDatabase);
139 
145  // lQueryParser.set_default_op (Xapian::Query::OP_ADJ);
146  lQueryParser.set_default_op (Xapian::Query::OP_PHRASE);
147 
148  // DEBUG
149  /*
150  OPENTREP_LOG_DEBUG ("Query parser `" << lQueryParser.get_description()
151  << "'");
152  */
153 
154  // DEBUG
155  // OPENTREP_LOG_DEBUG (" --------");
156 
157  // Start an enquire session
158  Xapian::Enquire enquire (iDatabase);
159 
166  const Xapian::Query& lXapianQuery =
167  lQueryParser.parse_query (lQueryString,
168  Xapian::QueryParser::FLAG_BOOLEAN
169  | Xapian::QueryParser::FLAG_PHRASE
170  | Xapian::QueryParser::FLAG_LOVEHATE);
171 
172  // Give the query object to the enquire session
173  enquire.set_query (lXapianQuery);
174 
175  // Get the top 20 results of the query
176  lMatchingSet = enquire.get_mset (0, 20);
177 
178  // Display the results
179  int nbMatches = lMatchingSet.size();
180 
181  // DEBUG
182  /*
183  OPENTREP_LOG_DEBUG (" Query string: `" << lQueryString
184  << "', i.e.: `" << lXapianQuery.get_description()
185  << "' => " << nbMatches << " result(s) found");
186  */
187 
188  if (nbMatches != 0) {
189  // There has been a matching
190  oDoesMatch = true;
191 
192  // DEBUG
193  /*
194  OPENTREP_LOG_DEBUG (" Query string: `" << lQueryString
195  << "' provides " << nbMatches << " exact matches.");
196  */
197 
198  return oDoesMatch;
199  }
200  assert (lMatchingSet.empty() == true);
201 
207  const NbOfErrors_T& lAllowableEditDistance =
208  calculateEditDistance (lQueryString);
209 
210  // Let Xapian find a spelling correction (if any)
211  const std::string& lCorrectedString =
212  iDatabase.get_spelling_suggestion (lQueryString, lAllowableEditDistance);
213 
214  // If the correction is no better than the original string, there is
215  // no need to go further: there is no match.
216  if (lCorrectedString.empty() == true || lCorrectedString == lQueryString) {
217  // DEBUG
218  /*
219  OPENTREP_LOG_DEBUG (" Query string: `"
220  << lQueryString << "' provides no match, "
221  << "and there is no spelling suggestion, "
222  << "even with an edit distance of "
223  << lAllowableEditDistance);
224  */
225 
226  // No match
227  return oDoesMatch;
228  }
229  assert (lCorrectedString.empty() == false
230  && lCorrectedString != lQueryString);
231 
232  // Calculate the effective (Levenshtein) edit distance/error
233  const NbOfErrors_T& lEditDistance =
234  Levenshtein::getDistance (lQueryString, lCorrectedString);
235 
243  const Xapian::Query& lCorrectedXapianQuery =
244  lQueryParser.parse_query (lCorrectedString,
245  Xapian::QueryParser::FLAG_BOOLEAN
246  | Xapian::QueryParser::FLAG_PHRASE
247  | Xapian::QueryParser::FLAG_LOVEHATE);
248 
249  enquire.set_query (lCorrectedXapianQuery);
250  lMatchingSet = enquire.get_mset (0, 20);
251 
252  // Display the results
253  nbMatches = lMatchingSet.size();
254 
255  // DEBUG
256  /*
257  OPENTREP_LOG_DEBUG (" Corrected query string: `" << lCorrectedString
258  << "', i.e.: `"
259  << lCorrectedXapianQuery.get_description()
260  << "' => " << nbMatches << " result(s) found");
261  */
262 
263  if (nbMatches != 0) {
264  // DEBUG
265  /*
266  OPENTREP_LOG_DEBUG (" Query string: `"
267  << lQueryString << "', spelling suggestion: `"
268  << lCorrectedString
269  << "', with a Levenshtein edit distance of "
270  << lEditDistance
271  << " over an allowable edit distance of "
272  << lAllowableEditDistance << ", provides "
273  << nbMatches << " matches.");
274  */
275 
276  //
277  oDoesMatch = true;
278  return oDoesMatch;
279  }
280 
281  // Error
282  OPENTREP_LOG_ERROR (" Query string: `"
283  << lQueryString << "', spelling suggestion: `"
284  << lCorrectedString
285  << "', with a Levenshtein edit distance of "
286  << lEditDistance
287  << " over an allowable edit distance of "
288  << lAllowableEditDistance << ", provides no match, "
289  << "which is not consistent with the existence of "
290  << "the spelling correction.");
291  assert (false);
292 
293  } catch (const Xapian::Error& error) {
294  // Error
295  OPENTREP_LOG_ERROR ("Exception: " << error.get_msg());
296  throw XapianException (error.get_msg());
297  }
298 
299  return oDoesMatch;
300  }
301 
302  // //////////////////////////////////////////////////////////////////////
303  void QuerySlices::init (const OTransliterator& iTransliterator) {
304  // 0. Initialisation
305  // 0.1. Stripping of the punctuation and quotation characters
306  _queryString = iTransliterator.unpunctuate (_queryString);
307  _queryString = iTransliterator.unquote (_queryString);
308 
309  // 0.2. Initialisation of the tokenizer
310  WordList_T lWordList;
312  const unsigned short nbOfWords = lWordList.size();
313 
314  // When the query has a single word, stop here, as there is a single slice
315  if (nbOfWords <= 1) {
316  _slices.push_back (_queryString);
317  return;
318  }
319 
320  // 0.3. Re-create the initial phrase, without any (potential) seperator
321  const std::string lPhrase = createStringFromWordList (lWordList);
322 
323  // 1. Browse the words, two by two, and check whether their association
324  // matches with the Xapian index
325  WordList_T::const_iterator itWord = lWordList.begin();
326  WordList_T::const_iterator itNextWord = lWordList.begin(); ++itNextWord;
327  for (unsigned short idx = 1, idx_rel = 1; itNextWord != lWordList.end();
328  ++itWord, ++itNextWord, ++idx, ++idx_rel) {
329  const std::string& leftWord = *itWord;
330  const std::string& rightWord = *itNextWord;
331 
332  // Store the left word in the staging string
333  if (idx_rel >= 2) {
334  _itLeftWords += " ";
335  }
336  _itLeftWords += leftWord;
337 
338  // Check whether the juxtaposition of the two contiguous words matches
339  const bool lDoesMatch =
340  OPENTREP::doesMatch (_database, leftWord, rightWord);
341 
342  if (lDoesMatch == true) {
343  // When the two words give a match, do nothing now, as at the next turn,
344  // the right word will become the left word and thus be added to the
345  // staging string
346 
347  // DEBUG
348  /*
349  OPENTREP_LOG_DEBUG ("[" << idx << "][" << idx_rel
350  << "] Match - staging string: '"
351  << _itLeftWords << "'");
352  */
353 
354  } else {
355  // DEBUG
356  /*
357  OPENTREP_LOG_DEBUG ("[" << idx << "][" << idx_rel
358  << "] No match - staging string: '"
359  << _itLeftWords << "'");
360  */
361 
362  // When the two words give no match, add the content of the staging
363  // list to the list of slices. Then, empty the staging string.
364  _slices.push_back (_itLeftWords);
365  _itLeftWords = "";
366  idx_rel = 0;
367  }
368  }
369 
370  // 2.
371  const std::string& leftWord = *itWord;
372  if (_itLeftWords.empty() == false) {
373  _itLeftWords += " ";
374  }
375  _itLeftWords += leftWord;
376  _slices.push_back (_itLeftWords);
377 
378  // DEBUG
379  // OPENTREP_LOG_DEBUG ("Last staging string: '" << _itLeftWords << "'");
380  // OPENTREP_LOG_DEBUG ("Slices: " << *this);
381  }
382 
383 }
void toStream(std::ostream &ioOut) const
Definition: QuerySlices.cpp:90
TravelQuery_T _queryString
const NbOfErrors_T K_DEFAULT_SIZE_FOR_SPELLING_ERROR_UNIT
#define OPENTREP_LOG_ERROR(iToBeLogged)
Definition: Logger.hpp:24
std::string _itLeftWords
std::string describeKey() const
Definition: QuerySlices.cpp:54
static int getDistance(const std::string &iSource, const std::string &iTarget)
Definition: Levenshtein.cpp:13
void push_back(const StringPartition &iStringPartition)
Definition: QuerySlices.cpp:32
QuerySlices(const Xapian::Database &, const TravelQuery_T &, const OTransliterator &)
Definition: QuerySlices.cpp:20
std::vector< std::string > WordList_T
std::string createStringFromWordList(const WordList_T &iWordList, const NbOfWords_T iSplitIdx, const bool iFromBeginningFlag)
Definition: Utilities.cpp:43
unsigned short NbOfErrors_T
StringPartitionList_T _slices
void tokeniseStringIntoWordList(const std::string &iPhrase, WordList_T &ioWordList)
Definition: Utilities.cpp:19
const Xapian::Database & _database
size_t size() const
Definition: QuerySlices.cpp:39
std::string TravelQuery_T
void fromStream(std::istream &ioIn)
Definition: QuerySlices.cpp:95
bool doesMatch(const Xapian::Database &iDatabase, const std::string &iWord1, const std::string &iWord2)
Helper function to query for a Xapian-based full text match.
std::string describe() const
Definition: QuerySlices.cpp:61
static unsigned int calculateEditDistance(const TravelQuery_T &iPhrase)
Helper function.