www.pudn.com > heritrix-1.14.0-src.rar > BaseRule.options
# Available URL Canonicalization Rules. # Lines beginning with # and empty lines are ignored org.archive.crawler.url.canonicalize.LowercaseRule org.archive.crawler.url.canonicalize.StripUserinfoRule org.archive.crawler.url.canonicalize.StripWWWRule org.archive.crawler.url.canonicalize.StripWWWNRule org.archive.crawler.url.canonicalize.StripSessionCFIDs org.archive.crawler.url.canonicalize.StripSessionIDs org.archive.crawler.url.canonicalize.RegexRule org.archive.crawler.url.canonicalize.FixupQueryStr org.archive.crawler.url.canonicalize.StripExtraSlashes