@INPROCEEDINGS{EPFL-WORKING-148549,
   abstract    = {As person names are non-unique, the same name on
                 different Web pages might or might not refer to the same
                 real-world person. This entity identification problem is
                 one of the most challenging issues in realizing the
                 Semantic Web or entity-oriented search. We address this
                 disambiguation problem, which is very similar to the
                 entity resolution problem studied in relational
                 databases, however there are also several differences.
                 Most importantly Web pages often only contain partial or
                 incomplete information about the persons, moreover the
                 available information is very heterogeneous, thus we are
                 only able to obtain some uncertain evidence about whether
                 two names refer to the same person using similarity
                 functions. These similarity functions capture some
                 aspects of the similarities between Web-pages, where the
                 names occur, thus they perform very differently for the
                 different names. We analyze some data engineering
                 techniques to cope with the limited accuracy of the
                 similarity functions and to combine multiple functions.
                 Even with our simple techniques we could demonstrate
                 systematic performance improvements and produce
                 comparable results to state-of-the-art methods.},
   address     = {Long Beach, California},
   affiliation = {EPFL},
   author      = {Yerva, Surender Reddy and Miklós, Zoltán and Aberer, Karl},
   details     = {http://infoscience.epfl.ch/record/148549},
   documenturl = {http://infoscience.epfl.ch/record/148549/files/desweb2010.pdf},
   keywords    = {Entity Resolution; Web; Semantic Web; Machine Learning;
                 okkam; NCCR-MICS/ESDN ; NCCR-MICS},
   oai-id      = {oai:infoscience.epfl.ch:148549},
   oai-set     = {working; fulltext-public; fulltext},
   pagecount   = {6},
   publisher   = {1st International Workshop on Data Engineering meets the
                 Semantic Web (DESWeb'2010) (co-located with ICDE'2010)},
   status      = {PUBLISHED},
   submitter   = {169837; 134136},
   title       = {Towards better entity resolution techniques for {W}eb
                 document collections},
   unit        = {LSIR},
   year        = 2010
}

