| | 127 | |
| | 128 | // |
| | 129 | // This is an implementation of JaroWinkler : http://en.wikipedia.org/wiki/Jaro-Winkler |
| | 130 | // |
| | 131 | // Jaro is working with a formula where we look to the: |
| | 132 | // Number of matching characters : characters that are both in t1 & t2 with an index position diff < (max(t1.length, t2.length)/2) -1 |
| | 133 | // Number of transpose characters : number of characters that are not in the same place in t1 & t2 |
| | 134 | // JaroWinkler is an extension of Jaro where we apply a coefficient to the start of the string |
| | 135 | // |
| | 136 | // the code is transpose from : http://www.cppfrance.com/codes/DISTANCE-JARO-WINKLER_49753.aspx |
| | 137 | // |
| | 138 | |
| | 139 | QString FindMatches(const QString &txt,int bl[]) |
| | 140 | { |
| | 141 | QString res; |
| | 142 | QChar ctmp = 'a'; |
| | 143 | |
| | 144 | int i = 0; |
| | 145 | while (i < txt.length()) |
| | 146 | { |
| | 147 | ctmp = txt[i]; |
| | 148 | if (bl[i] == true) |
| | 149 | { |
| | 150 | res.append(ctmp); |
| | 151 | } |
| | 152 | i++; |
| | 153 | } |
| | 154 | return res; |
| | 155 | } |
| | 156 | |
| | 157 | double JaroWinkler(const QString &t1,const QString &t2) |
| | 158 | { |
| | 159 | int maxSpace,l1,l2, matchingCount , transposalCount , prefixLength , i, j; |
| | 160 | QString t1Matche,t2Matche; |
| | 161 | double jaroLength; |
| | 162 | |
| | 163 | l1 = t1.length(); |
| | 164 | l2 = t2.length(); |
| | 165 | maxSpace = (int)(((double)max(l1,l2)) / 2.00); |
| | 166 | matchingCount = 0; |
| | 167 | |
| | 168 | VERBOSE(VB_XMLTV, QString("Input (%1) (%2)").arg(t1).arg(t2)); |
| | 169 | VERBOSE(VB_XMLTV, QString("matchingCount (%1)").arg(maxSpace)); |
| | 170 | |
| | 171 | int b1[l1+2],b2[l2+2]; |
| | 172 | |
| | 173 | for (i = 0; i < l1; i++) |
| | 174 | b1[i] = false; |
| | 175 | |
| | 176 | for (i = 0;i < l2; i++) |
| | 177 | b2[i] = false; |
| | 178 | |
| | 179 | for (i = 0; i <l1; i++) |
| | 180 | { |
| | 181 | for (j = max(i-maxSpace,0); j < min(i+maxSpace,l2); j++) |
| | 182 | { |
| | 183 | if (t1[i] == t2[j]) |
| | 184 | { |
| | 185 | b1[i] = true; |
| | 186 | b2[i] = true; |
| | 187 | matchingCount++; |
| | 188 | break; |
| | 189 | } |
| | 190 | } |
| | 191 | } |
| | 192 | |
| | 193 | VERBOSE(VB_XMLTV, QString("matchingCount (%1)").arg(matchingCount)); |
| | 194 | |
| | 195 | if (matchingCount == 0) |
| | 196 | return 0.0; |
| | 197 | |
| | 198 | t1Matche = FindMatches(t1,b1); |
| | 199 | t2Matche = FindMatches(t2,b2); |
| | 200 | |
| | 201 | VERBOSE(VB_XMLTV, QString("t1Matche (%1)").arg(t1Matche)); |
| | 202 | VERBOSE(VB_XMLTV, QString("t2Matche (%1)").arg(t2Matche)); |
| | 203 | |
| | 204 | transposalCount = 0; |
| | 205 | if (strcmp(t1Matche,t2Matche) != 0) |
| | 206 | { |
| | 207 | for (i = 0; i < t1Matche.length(); i++) |
| | 208 | if (t1Matche[i] != t2Matche[i]) |
| | 209 | transposalCount++; |
| | 210 | } |
| | 211 | else |
| | 212 | transposalCount = 0; |
| | 213 | |
| | 214 | |
| | 215 | VERBOSE(VB_XMLTV, QString("transposalCount (%1)").arg(transposalCount)); |
| | 216 | |
| | 217 | jaroLength = (((double)matchingCount/l1) + |
| | 218 | ((double)matchingCount/l2) + |
| | 219 | ((matchingCount - transposalCount / 2.0) / matchingCount)) / 3.0; |
| | 220 | |
| | 221 | VERBOSE(VB_XMLTV, QString("JARO Length: (%1)").arg(jaroLength)); |
| | 222 | |
| | 223 | prefixLength = 0; |
| | 224 | for (i = 0; i < min(3, min(l1,l2)) + 1; i++) //longueur max : 4 |
| | 225 | { |
| | 226 | if ( i < t1.length() && |
| | 227 | i < t2.length() && |
| | 228 | t1[i] == t2[i]) |
| | 229 | prefixLength++; |
| | 230 | else |
| | 231 | break; |
| | 232 | } |
| | 233 | |
| | 234 | VERBOSE(VB_XMLTV, QString("prefixLength (%1)").arg(prefixLength)); |
| | 235 | |
| | 236 | return jaroLength + ( prefixLength * 0.1 * (1 - jaroLength)); |
| | 237 | } |
| | 238 | |
| | 239 | |
| | 240 | ChanInfo* _findChanMatch(QList<ChanInfo> *oldchanlist, ChanInfo chan) |
| | 241 | { |
| | 242 | QList<ChanInfo>::iterator i = oldchanlist->begin(); |
| | 243 | QString channelNameUpper = chan.name.toUpper(); |
| | 244 | double jaroMax = 0.0; |
| | 245 | ChanInfo *jaroMaxChanInfo; |
| | 246 | |
| | 247 | VERBOSE(VB_XMLTV,QString("============================================")); |
| | 248 | VERBOSE(VB_XMLTV,QString("searching for (%1)").arg(channelNameUpper)); |
| | 249 | |
| | 250 | for (; i != oldchanlist->end(); i++) |
| | 251 | { |
| | 252 | double d1 = 0; |
| | 253 | double d2 = 0; |
| | 254 | d1 = JaroWinkler(i->name, channelNameUpper); |
| | 255 | d2 = JaroWinkler(i->callsign, channelNameUpper); |
| | 256 | |
| | 257 | if (d1 > 0.9 && d2 > 0.9) |
| | 258 | { |
| | 259 | VERBOSE(VB_XMLTV, |
| | 260 | QString("Looking for a match (%1), (%2), (%3) <==> (%4) = d1 (%5), d2 (%6)") |
| | 261 | .arg((*i).name).arg((*i).callsign).arg((*i).chanstr).arg(channelNameUpper).arg(d1).arg(d2)); |
| | 262 | } |
| | 263 | |
| | 264 | // Look to the best match |
| | 265 | if (d1 > 0.5 && d1 > jaroMax) |
| | 266 | { |
| | 267 | jaroMax = d1; |
| | 268 | jaroMaxChanInfo = &(*i); |
| | 269 | } |
| | 270 | |
| | 271 | if (d2 > 0.5 && d2 > jaroMax) |
| | 272 | { |
| | 273 | jaroMax = d2; |
| | 274 | jaroMaxChanInfo = &(*i); |
| | 275 | } |
| | 276 | } |
| | 277 | |
| | 278 | // only keep match with score > 0.93, under may be a false positive |
| | 279 | if (jaroMax > 0.93) |
| | 280 | { |
| | 281 | VERBOSE(VB_XMLTV, |
| | 282 | QString("match is (%1), (%2), (%3) <==> (%4) = d1 (%5)") |
| | 283 | .arg(jaroMaxChanInfo->name).arg(jaroMaxChanInfo->callsign).arg(jaroMaxChanInfo->chanstr).arg(channelNameUpper).arg(jaroMax)); |
| | 284 | return jaroMaxChanInfo; |
| | 285 | } |
| | 286 | else |
| | 287 | { |
| | 288 | VERBOSE(VB_XMLTV, QString("no match")); |
| | 289 | return NULL; |
| | 290 | } |
| | 291 | } |
| | 292 | |