Implement OSM.spam_score to return a spam score for a piece of text

This commit is contained in:
Tom Hughes 2010-04-29 00:29:49 +01:00
parent d75c99b62d
commit d80f260b8b

View file

@ -7,6 +7,7 @@ module OSM
require 'xml/libxml'
require 'digest/md5'
require 'RMagick'
require 'nokogiri'
# The base class for API Errors.
class APIError < RuntimeError
@ -498,5 +499,24 @@ module OSM
return "#{tilesql} AND #{prefix}latitude BETWEEN #{minlat} AND #{maxlat} AND #{prefix}longitude BETWEEN #{minlon} AND #{maxlon}"
end
# Return a spam score for a chunk of text
def self.spam_score(text)
link_count = 0
link_size = 0
doc = Nokogiri::HTML(text)
if doc.content.length > 0
doc.xpath("//a").each do |link|
link_count += 1
link_size += link.content.length
end
link_proportion = link_size.to_f / doc.content.length.to_f
else
link_proportion = 0
end
return [link_proportion - 0.2, 0.0].max * 200 + link_count * 20
end
end