Implement OSM.spam_score to return a spam score for a piece of text
This commit is contained in:
parent
d75c99b62d
commit
d80f260b8b
1 changed files with 20 additions and 0 deletions
20
lib/osm.rb
20
lib/osm.rb
|
@ -7,6 +7,7 @@ module OSM
|
|||
require 'xml/libxml'
|
||||
require 'digest/md5'
|
||||
require 'RMagick'
|
||||
require 'nokogiri'
|
||||
|
||||
# The base class for API Errors.
|
||||
class APIError < RuntimeError
|
||||
|
@ -498,5 +499,24 @@ module OSM
|
|||
return "#{tilesql} AND #{prefix}latitude BETWEEN #{minlat} AND #{maxlat} AND #{prefix}longitude BETWEEN #{minlon} AND #{maxlon}"
|
||||
end
|
||||
|
||||
# Return a spam score for a chunk of text
|
||||
def self.spam_score(text)
|
||||
link_count = 0
|
||||
link_size = 0
|
||||
|
||||
doc = Nokogiri::HTML(text)
|
||||
|
||||
if doc.content.length > 0
|
||||
doc.xpath("//a").each do |link|
|
||||
link_count += 1
|
||||
link_size += link.content.length
|
||||
end
|
||||
|
||||
link_proportion = link_size.to_f / doc.content.length.to_f
|
||||
else
|
||||
link_proportion = 0
|
||||
end
|
||||
|
||||
return [link_proportion - 0.2, 0.0].max * 200 + link_count * 20
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Add table
Reference in a new issue