--- scbayes.in~ 2003-06-17 18:02:59.000000000 +0900 +++ scbayes.in 2003-11-16 00:57:52.000000000 +0900 @@ -44,6 +44,9 @@ (define (mailbox-dir) (expand-path (get-keyword :mailbox *config-params* "~/Mail"))) +(define (mailbox-type) + (get-keyword :mailbox-type *config-params* 'mh)) + (define (folder-dir folder) (build-path (mailbox-dir) folder)) @@ -74,7 +77,7 @@ (check-folder-dir folder) (with-token-table (table-file) :write - (cut collect-words (folder-dir folder) table-type))) + (cut collect-words (folder-dir folder) (mailbox-type) table-type))) (define (check-mail path) (unless (file-is-readable? path) @@ -87,7 +90,7 @@ (check-folder-dir folder) (with-token-table (table-file) :read - (cut test-spamness-in-folder (folder-dir folder) spam?))) + (cut test-spamness-in-folder (folder-dir folder) (mailbox-type) spam?))) (define (table-stat) (with-token-table (table-file) :read (cut simple-stat))) @@ -99,6 +102,7 @@ (print "done.")) (define (main args) + (read-config) (parse-options (cdr args) (("learn-nonspam=s" (folder) (set! command (lambda () (learn folder nonspam-table)))) --- scmail/bayesian-filter.scm~ 2003-06-17 16:56:50.000000000 +0900 +++ scmail/bayesian-filter.scm 2003-11-16 01:02:59.000000000 +0900 @@ -179,18 +179,25 @@ (define word-probability-limit 0.001) ;; Count tokens from emails in the folder. -;; (collect-words "~/Mail/spam" spam-table) -;; (collect-words "~/Mail/inbox" nonspam-table) -(define (collect-words directory table-getter) +;; (collect-words "~/Mail/spam" 'mh spam-table) +;; (collect-words "~/Mail/inbox" 'mh nonspam-table) +;; (collect-words "~/Maildir/.spam" 'maildir spam-table) +;; (collect-words "~/Maildir" 'maildir nonspam-table) +(define (collect-words directory mailbox-type table-getter) (for-each (lambda (email) (receive (tokens lang) (tokenize-email email) (unless (null? tokens) (let1 tab (table-getter lang) (for-each (cut add-token tab <>) tokens) (inc! (message-count tab)))))) - (directory-list (expand-path directory) - :children? #t :add-path? #t - :filter #/^\d+$/))) + (cond ((eq? mailbox-type 'maildir) + (directory-list (expand-path (build-path directory "cur")) + :children? #t :add-path? #t + :filter #/^\d+/)) + (else + (directory-list (expand-path directory) + :children? #t :add-path? #t + :filter #/^\d+$/))))) ;; Calculate a word's spam probability. ;; Cf. Paul Graham, "A Plan for Spam" and "Better Bayesian Filtering" @@ -518,9 +525,11 @@ (for-each (lambda (w) (print #`" ,(car w) : ,(cdr w)")) words))) -;; (test-spamness-in-folder "~/Mail/inbox" #f) -;; (test-spamness-in-folder "~/Mail/spam" #t) -(define (test-spamness-in-folder directory expect-spam?) +;; (test-spamness-in-folder "~/Mail/inbox" 'mh #f) +;; (test-spamness-in-folder "~/Mail/spam" 'mh #t) +;; (test-spamness-in-folder "~/Maildir" 'maildir #f) +;; (test-spamness-in-folder "~/Maildir/.spam" 'maildir #t) +(define (test-spamness-in-folder directory mailbox-type expect-spam?) (let ((threshold 0.9) (count 0) (bad '())) @@ -532,9 +541,14 @@ (and (not expect-spam?) (>= prob threshold))) (push! bad (list email prob words))))) - (directory-list (expand-path directory) - :children? #t :add-path? #t - :filter #/^\d+$/)) + (cond ((eq? mailbox-type 'maildir) + (directory-list (expand-path (build-path directory "cur")) + :children? #t :add-path? #t + :filter #/^\d+/)) + (else + (directory-list (expand-path directory) + :children? #t :add-path? #t + :filter #/^\d+$/)))) (print #`"Out of ,count messages in ,directory") (print #`" ,(length bad) messages are identified incorrectly:") (for-each (lambda (entry)