| | 127 | |
| | 128 | // |
| | 129 | // This is an implementation of JaroWinkler : http://en.wikipedia.org/wiki/Jaro-Winkler |
| | 130 | // |
| | 131 | // Jaro is working with a formula where we look to the: |
| | 132 | // Number of matching characters : characters that are both in t1 & t2 with an index position diff < (max(t1.length, t2.length)/2) -1 |
| | 133 | // Number of transpose characters : number of characters that are not in the same place in t1 & t2 |
| | 134 | // JaroWinkler is an extension of Jaro where we apply a coefficient to the start of the string |
| | 135 | // |
| | 136 | // the code is transpose from : http://www.cppfrance.com/codes/DISTANCE-JARO-WINKLER_49753.aspx |
| | 137 | // |
| | 138 | |
| | 139 | QString TrouverMatches(const QString &txt,int bl[]) |
| | 140 | { |
| | 141 | QString res; |
| | 142 | QChar ctmp='a'; |
| | 143 | |
| | 144 | int i=0; |
| | 145 | while (i<txt.length()) |
| | 146 | { |
| | 147 | ctmp=txt[i]; |
| | 148 | if (bl[i]==true) |
| | 149 | { |
| | 150 | res.append(ctmp); |
| | 151 | } |
| | 152 | i++; |
| | 153 | } |
| | 154 | return res; |
| | 155 | } |
| | 156 | |
| | 157 | double JaroWinkler(const QString &t1,const QString &t2) |
| | 158 | { |
| | 159 | int ecartMax,l1,l2,compteMatching,compteTransposition,longueurPrefix,i,j; |
| | 160 | QString t1Matche,t2Matche; |
| | 161 | double distanceJaro; |
| | 162 | |
| | 163 | l1=t1.length(); |
| | 164 | l2=t2.length(); |
| | 165 | ecartMax=(int)(((double)max(l1,l2))/2.00); |
| | 166 | compteMatching=0; |
| | 167 | |
| | 168 | VERBOSE(VB_XMLTV, QString("input (%1) (%2)").arg(t1).arg(t2)); |
| | 169 | VERBOSE(VB_XMLTV, QString("ecartMax (%1)").arg(ecartMax)); |
| | 170 | |
| | 171 | int b1[l1+2],b2[l2+2]; |
| | 172 | |
| | 173 | for (i=0;i<l1;i++) |
| | 174 | b1[i]=false; |
| | 175 | for (i=0;i<l2;i++) |
| | 176 | b2[i]=false; |
| | 177 | |
| | 178 | for (i=0;i<l1;i++) |
| | 179 | { |
| | 180 | for (j=max(i-ecartMax,0);j<min(i+ecartMax,l2);j++) |
| | 181 | { |
| | 182 | if (t1[i]==t2[j]) |
| | 183 | { |
| | 184 | b1[i]=true; |
| | 185 | b2[i]=true; |
| | 186 | compteMatching++; |
| | 187 | break; |
| | 188 | } |
| | 189 | } |
| | 190 | } |
| | 191 | |
| | 192 | VERBOSE(VB_XMLTV, QString("compteMatching (%1)").arg(compteMatching)); |
| | 193 | if (compteMatching==0) |
| | 194 | return 0.0; |
| | 195 | |
| | 196 | t1Matche=TrouverMatches(t1,b1); |
| | 197 | t2Matche=TrouverMatches(t2,b2); |
| | 198 | |
| | 199 | VERBOSE(VB_XMLTV, QString("t1Matche (%1)").arg(t1Matche)); |
| | 200 | VERBOSE(VB_XMLTV, QString("t2Matche (%1)").arg(t2Matche)); |
| | 201 | |
| | 202 | compteTransposition=0; |
| | 203 | if (strcmp(t1Matche,t2Matche)!=0) |
| | 204 | { |
| | 205 | for (i=0;i<t1Matche.length();i++) |
| | 206 | if (t1Matche[i]!=t2Matche[i]) |
| | 207 | compteTransposition++; |
| | 208 | } |
| | 209 | else |
| | 210 | compteTransposition=0; |
| | 211 | |
| | 212 | |
| | 213 | VERBOSE(VB_XMLTV, QString("compteTransposition (%1)").arg(compteTransposition)); |
| | 214 | |
| | 215 | distanceJaro=(((double)compteMatching/l1)+((double)compteMatching/l2)+((compteMatching-compteTransposition/2.0)/compteMatching))/3.0; |
| | 216 | |
| | 217 | VERBOSE(VB_XMLTV, QString("dist JARO: (%1)").arg(distanceJaro)); |
| | 218 | longueurPrefix=0; |
| | 219 | for (i=0;i<min(3,min(l1,l2))+1;i++) //longueur max : 4 |
| | 220 | { |
| | 221 | if (i<t1.length() && i<t2.length() && t1[i]==t2[i]) |
| | 222 | longueurPrefix++; |
| | 223 | else |
| | 224 | break; |
| | 225 | } |
| | 226 | |
| | 227 | VERBOSE(VB_XMLTV, QString("longueurPrefix (%1)").arg(longueurPrefix)); |
| | 228 | |
| | 229 | return distanceJaro+(longueurPrefix*0.1*(1-distanceJaro)); |
| | 230 | } |
| | 231 | |
| | 232 | |
| | 233 | ChanInfo* _findChanMatch(QList<ChanInfo> *oldchanlist, ChanInfo chan) |
| | 234 | { |
| | 235 | QList<ChanInfo>::iterator i = oldchanlist->begin(); |
| | 236 | QString chanNameUp = chan.name.toUpper(); |
| | 237 | double maxJaro = 0.0; |
| | 238 | ChanInfo *maxJaroChanInfo; |
| | 239 | |
| | 240 | VERBOSE(VB_XMLTV,QString("============================================")); |
| | 241 | VERBOSE(VB_XMLTV,QString("searching for (%1)").arg(chanNameUp)); |
| | 242 | |
| | 243 | for (; i != oldchanlist->end(); i++) |
| | 244 | { |
| | 245 | double d1 = 0; |
| | 246 | double d2 = 0; |
| | 247 | d1 = JaroWinkler(i->name, chanNameUp); |
| | 248 | d2 = JaroWinkler(i->callsign, chanNameUp); |
| | 249 | |
| | 250 | if (d1>0.9 && d2>0.9) |
| | 251 | { |
| | 252 | VERBOSE(VB_XMLTV, |
| | 253 | QString("Looking for a match (%1), (%2), (%3) <==> (%4) = d1 (%5), d2 (%6)") |
| | 254 | .arg((*i).name).arg((*i).callsign).arg((*i).chanstr).arg(chanNameUp).arg(d1).arg(d2)); |
| | 255 | } |
| | 256 | |
| | 257 | // Look to the best match |
| | 258 | if (d1>0.5 && d1>maxJaro) |
| | 259 | { |
| | 260 | maxJaro = d1; |
| | 261 | maxJaroChanInfo = &(*i); |
| | 262 | } |
| | 263 | |
| | 264 | if (d2>0.5 && d2>maxJaro) |
| | 265 | { |
| | 266 | maxJaro = d2; |
| | 267 | maxJaroChanInfo = &(*i); |
| | 268 | } |
| | 269 | } |
| | 270 | |
| | 271 | // only keep match with score > 0.93, under may be a false positive |
| | 272 | if (maxJaro>0.93) |
| | 273 | { |
| | 274 | VERBOSE(VB_XMLTV, |
| | 275 | QString("match is (%1), (%2), (%3) <==> (%4) = d1 (%5)") |
| | 276 | .arg(maxJaroChanInfo->name).arg(maxJaroChanInfo->callsign).arg(maxJaroChanInfo->chanstr).arg(chanNameUp).arg(maxJaro)); |
| | 277 | return maxJaroChanInfo; |
| | 278 | } |
| | 279 | else |
| | 280 | { |
| | 281 | VERBOSE(VB_XMLTV, |
| | 282 | QString("no match")); |
| | 283 | |
| | 284 | return NULL; |
| | 285 | } |
| | 286 | } |
| | 287 | |