www.pudn.com > heritrix-1.14.0-src.rar > Processor.options


# Available processors
# Each processor class should be listed with full package info
# followed by a '|' and a descriptive name (containing only [a-z,A-z])
# Lines beginning with # and empty lines are ignored
org.archive.crawler.prefetch.Preselector|Preselector
org.archive.crawler.prefetch.PreconditionEnforcer|Preprocessor
org.archive.crawler.fetcher.FetchDNS|DNS
org.archive.crawler.fetcher.FetchHTTP|HTTP
org.archive.crawler.fetcher.FetchFTP|FTP
org.archive.crawler.extractor.ExtractorHTTP|ExtractorHTTP
org.archive.crawler.extractor.ExtractorHTML|ExtractorHTML
org.archive.crawler.extractor.AggressiveExtractorHTML|AggressiveExtractorHTML
org.archive.crawler.extractor.ExtractorCSS|ExtractorCSS
org.archive.crawler.extractor.ExtractorSWF|ExtractorSWF
org.archive.crawler.extractor.ExtractorJS|ExtractorJS
org.archive.crawler.extractor.ExtractorPDF|ExtractorPDF
org.archive.crawler.extractor.ExtractorDOC|ExtractorDOC
org.archive.crawler.extractor.ExtractorXML|ExtractorXML
org.archive.crawler.extractor.ExtractorUniversal|ExtractorUniversal
org.archive.crawler.extractor.ExtractorURI|ExtractorURI
org.archive.crawler.extractor.ExtractorImpliedURI|ExtractorImpliedURI
org.archive.crawler.extractor.ChangeEvaluator|ChangeEvaluator
org.archive.crawler.extractor.HTTPContentDigest|HTTPContentDigest
org.archive.crawler.writer.ARCWriterProcessor|Archiver
org.archive.crawler.writer.WARCWriterProcessor|WARCArchiver
org.archive.crawler.writer.Kw3WriterProcessor|Kw3Archiver
org.archive.crawler.writer.MirrorWriterProcessor|MirrorWriter
org.archive.crawler.postprocessor.CrawlStateUpdater|Updater
org.archive.crawler.postprocessor.LinksScoper|LinksScoper
org.archive.crawler.postprocessor.SupplementaryLinksScoper|SupplementaryLinksScoper
org.archive.crawler.postprocessor.FrontierScheduler|FrontierScheduler
org.archive.crawler.postprocessor.LowDiskPauseProcessor|LowDiskPause
org.archive.crawler.postprocessor.WaitEvaluator|WaitEvaluator
org.archive.crawler.postprocessor.ContentBasedWaitEvaluator|ContentBasedWaitEvaluator
org.archive.crawler.postprocessor.TextWaitEvaluator|TextWaitEvaluator
org.archive.crawler.postprocessor.ImageWaitEvaluator|ImageWaitEvaluator
org.archive.crawler.postprocessor.AcceptRevisitProcessor|AcceptRevisitProcessor
org.archive.crawler.postprocessor.RejectRevisitProcessor|RejectRevisitProcessor
org.archive.crawler.processor.LexicalCrawlMapper|LexicalCrawlMapper
org.archive.crawler.processor.HashCrawlMapper|HashCrawlMapper
org.archive.crawler.processor.BeanShellProcessor|BeanShellProcessor
org.archive.crawler.prefetch.QuotaEnforcer|QuotaEnforcer
org.archive.crawler.prefetch.RuntimeLimitEnforcer|RuntimeLimitEnforcer
org.archive.crawler.extractor.JerichoExtractorHTML|JerichoExtractorHTML
org.archive.crawler.processor.recrawl.PersistStoreProcessor|PersistStoreProcessor
org.archive.crawler.processor.recrawl.PersistLogProcessor|PersistLogProcessor
org.archive.crawler.processor.recrawl.PersistLoadProcessor|PersistLoadProcessor
org.archive.crawler.processor.recrawl.FetchHistoryProcessor|FetchHistoryProcessor
org.archive.crawler.extractor.TrapSuppressExtractor|TrapSuppressExtractor