/************************************************* * Exim - an Internet mail transport agent * *************************************************/ /* Copyright (c) University of Cambridge 1995 - 2002 */ /* See the file NOTICE for conditions of use and distribution. */ /* Spamassassin in local_scan by Marc MERLIN - 2002/04/15 Home page: http://marc.merlins.org/linux/exim/sa.html 2002/04/16 - v0.9 Pre-release 2002/04/17 - v0.9.1 Added a few checks for possible failures 2002/05/05 - v1.0 *Many* changes. This is now a really program with hopefully most of the configuration options people could need - Fully ocnfigurable through external config file - Condition to run or not run SA against a message - 3 Levels of spam handling - Option to save messages that are rejected or cause errors - Very complete error checking For all this to work correctly, you global spamassassin config should have: # disable web bugs and other potentially dangerous attachements defang_mime 1 # Put report in the headers report_header 1 use_terse_report 1 # Do not rewrite the subject line with "****SPAM****" by default rewrite_subject 0 Note too that this script will not act on any mail before it is flagged as SPAM by SA, but SA can flag mail as spam and you can set a higher threshold for action in local_scan */ #include #include #include #include #include #include #include #include #include #include #include "local_scan.h" /******************************/ /* Compile time config values */ /******************************/ static const char conffile[]="/etc/mail/spamassassin.conf"; static const char spamc[]="/usr/bin/spamc"; /* How much message body you want to feed to SA. By Default SA only processes 250k If you increase this value, you'll have to change the call to spamc and give it -s size */ static const int samaxbody=250*1024; /********************/ /* Code starts here */ /********************/ static const char nospamstatus[]=""; static char *buffera[4096]; static char *buffer=(char *)buffera; static char *reportbuffera[4096]; static char *reportbuffer=(char *)reportbuffera; static char *where="Error handler called without error string"; static int line=-1; static char *panicerror; #define CHECKERR(mret, mwhere, mline) \ if (mret < 0) \ { \ where=mwhere; \ line=mline; \ goto errexit; \ } \ #define PANIC(merror) \ panicerror=merror; \ goto panicexit; /* Rejected mails can be archived in a spool directory */ /* filename will contain a double / before the filename, I prefer two to none */ int savemail(int readfd, char *filename, int SAmaxarchivebody, int SAEximDebug) { header_line *hl; FILE *readfh; int writefd; int ret; int chunk; if (SAEximDebug) { log_write(0, LOG_MAIN, "Writing suspected spam/problem message to %s", filename); } /* Let's not worry about you receiving two spams at the same second * with the same message ID. If you do, the second one will overwrite * the first one */ writefd=creat(filename, S_IRUSR|S_IWUSR); CHECKERR(writefd, string_sprintf("creat %s", filename),__LINE__); /* First we need to get the header lines from exim, and then we can read the body from writefd */ hl=header_list; while (hl != NULL) { /* type '*' means the header is internal, don't print it */ if (hl->type == '*') { hl=hl->next; continue; } ret=write(writefd,hl->text,strlen(hl->text)); CHECKERR(ret,string_sprintf("header line write in %s", filename),__LINE__); hl=hl->next; } ret=write(writefd,"\n",1); CHECKERR(ret,string_sprintf("header separation write in %s", filename),__LINE__); /* Now copy the body to the save file */ /* we already read from readfd, so we need to reset it */ ret=lseek(readfd, SEEK_SET, 0); CHECKERR(ret, "lseek reset on spooled message", __LINE__); /* This sets us back a bit too far, we need to skip the first line which */ /* contains the message ID on disk */ readfh=fdopen(readfd, "r"); CHECKERR(readfh, "fdopen",__LINE__); fgets((char *)buffer, sizeof(buffera), readfh); chunk=(SAmaxarchivebody / (sizeof(buffera)-1))+1; if (SAEximDebug > 4) { log_write(0, LOG_MAIN, "Body write chunk starts with %d/%d=%d", SAmaxarchivebody , sizeof(buffera), chunk); } while ((ret=fread(buffer, 1, sizeof(buffera),readfh)) > 0 && chunk-- > 0) { if (SAEximDebug > 4) { log_write(0, LOG_MAIN, "Processing body chunk %d", chunk); } ret=write(writefd, buffer, ret); CHECKERR(ret,string_sprintf("body write in %s", filename),__LINE__); } ret=ferror(readfh); CHECKERR(ret, "read body of spooled message for archival", __LINE__); return 0; errexit: return -1; } int local_scan(int fd, uschar **return_text) { header_line *hl; int ret; int pid; int writefd[2]; int readfd[2]; FILE *readfh; char *mesgfn=NULL; char *safemesgid=NULL; static int readconffile=0; static int debugmesg=0; static int SAEximDebug=0; static char *SAspamcpath="/usr/bin/spamc"; static int SAmaxarchivebody=20*1048576; static char *SAerrorsave=NULL; static int SAtemprejectonerror=0; static char *SAEximRunCond="0"; static float SAdevnull=99999.0; static float SApermreject=99999.0; static float SAtempreject=99999.0; static char *SAdevnullsave=NULL; static char *SApermrejectsave=NULL; static char *SAtemprejectsave=NULL; static int SAtemprejectoverwrite=1; int isspam=0; int gotsa=0; int chunk; uschar *expand; float spamvalue=0.0; char *spamstatus=NULL; char *contenttype=NULL; char *xspamstatus=NULL; char *xspamflag=NULL; char *xspamlevel=NULL; /* SA 2.20 or better */ char *xspamcheckerversion=NULL; char *xspamprevcontenttype=NULL; char *xspamreport=NULL; char *mesgid=NULL; /* Any error can write the faulty message to mesgfn, so we need to give it *some* value right now, and improve it as we go along */ mesgfn=string_sprintf("%d", time(NULL)); /* We won't scan local messages. I think exim bypasses local_scan for a * bounce generated after a locally submitted message, but better be safe */ /* This is commented out now because you can control it with SAEximRunCond if (!sender_host_address) { return LOCAL_SCAN_ACCEPT; } */ /* * We keep track of whether we've alrady read the config file, but since * exim spawns itself, it will get read by exim children even though you * didn't restart exim. That said, after you change the config file, you * should restart exim to make sure all the instances pick up the new * config file */ /* Never mind, this causes data corruption because the buffers we get from * exim are overwritten by the time we get called again */ /* if (!readconffile) */ if (1) { ret=open(conffile, 0); CHECKERR(ret,"conf file open",__LINE__); readfh=fdopen(ret, "r"); CHECKERR(readfh,"fdopen",__LINE__); while ((fgets((char *)buffer,sizeof(buffera),readfh)) != NULL) { if (*buffer == '#') { continue; } if (*buffer == '\n' ) { continue; } if (*buffer != 'S' || *(buffer+1) != 'A') { log_write(0, LOG_MAIN, "SA error while reading configuration file %s. Line does not begin with a SA directive: '%s', ignoring", conffile, buffer); continue; } #define M_CHECKFORVAR(VAR, TYPE) \ if (strstr(buffer, #VAR ## ": ") == buffer) \ { \ if (sscanf(buffer, #VAR ## ": " ## TYPE, & ## VAR)) \ { \ if (SAEximDebug > 3) \ { \ if (SAEximDebug && ! debugmesg) \ { \ log_write(0, LOG_MAIN, "SpamAssassin Debug enabled, reading config from file %s", conffile); \ debugmesg=1; \ } \ else \ { \ log_write(0, LOG_MAIN, "SA Config: read "## #VAR ##" = "## TYPE, VAR); \ }\ }\ } \ else \ { \ log_write(0, LOG_MAIN, "SA error while reading configuration file %s. Can't parse value in: '%s', ignoring", conffile, buffer); \ } \ continue; \ } #define M_CHECKFORSTR(VAR) \ if (strstr(buffer, #VAR ## ": ") == buffer) \ { \ VAR = string_copy(buffer+strlen( #VAR )+2); \ \ if (VAR[strlen(VAR)-1] == '\n') \ { \ VAR[strlen(VAR)-1]=0; \ } \ if (SAEximDebug > 3) \ { \ log_write(0, LOG_MAIN, "SA config: read "## #VAR ##" = %s", VAR); \ } \ continue; \ } M_CHECKFORVAR(SAEximDebug, "%d"); M_CHECKFORSTR(SAspamcpath); M_CHECKFORVAR(SAmaxarchivebody, "%d"); M_CHECKFORSTR(SAerrorsave); M_CHECKFORVAR(SAtemprejectonerror, "%d"); M_CHECKFORSTR(SAEximRunCond); M_CHECKFORVAR(SAdevnull, "%f"); M_CHECKFORSTR(SAdevnullsave); M_CHECKFORVAR(SApermreject, "%f"); M_CHECKFORSTR(SApermrejectsave); M_CHECKFORVAR(SAtempreject, "%f"); M_CHECKFORSTR(SAtemprejectsave); M_CHECKFORVAR(SAtemprejectoverwrite, "%d"); } readconffile=1; } expand=expand_string(SAEximRunCond); if (expand == NULL) { PANIC(string_sprintf("SAEximRunCond expansion failure on %s", SAEximRunCond)); } if (SAEximDebug) { log_write(0, LOG_MAIN, "SA check expand returned: '%s'", expand); } /* Bail from SA if the expansion string says so */ if (strlen(expand) == 0 || !strcmp(expand, "0")) { log_write(0, LOG_MAIN, "SA check skipped due to expansion to false"); return LOCAL_SCAN_ACCEPT; } if (SAEximDebug) { log_write(0, LOG_MAIN, "SA check succeeded, running spamc"); } /* Fork off spamc, and get ready to talk to it */ ret=pipe(writefd); ret=pipe(readfd); CHECKERR(ret,"pipe 2",__LINE__); if ((pid=fork()) < 0) { CHECKERR(pid, "fork", __LINE__ - 1); } if (pid == 0) { close(readfd[0]); close(writefd[1]); ret=dup2(writefd[0],0); CHECKERR(ret,"dup2 stdin",__LINE__); ret=dup2(readfd[1],1); CHECKERR(ret,"dup2 stdout",__LINE__); ret=dup2(readfd[1],2); CHECKERR(ret,"dup2 stderr",__LINE__); /* * If you are really smart, you'd implement the spamc protocol and * talk to spamd directly instead of forking spamc, but considering * the overhead spent in spamd, forking off spamc seemed acceptable * rather than re-implementing and tracking the spamc/spamd protocol */ ret=execl(SAspamcpath, "spamc", NULL); CHECKERR(ret,string_sprintf("exec %s", SAspamcpath),__LINE__); } ret=close(readfd[1]); CHECKERR(ret,"close r",__LINE__); ret=close(writefd[0]); CHECKERR(ret,"close w",__LINE__); readfh=fdopen(readfd[0], "r"); /* Ok, we're ready for spewing the mail at spamc */ /* First we need to get the header lines from exim, and then we can read the body from fd */ hl=header_list; while (hl != NULL) { /* type '*' means the header is internal, don't print it */ if (hl->type == '*') { hl=hl->next; continue; } ret=write(writefd[1],hl->text,strlen(hl->text)); CHECKERR(ret,"header line write",__LINE__); /* We are replacing the Content-Type header by the one we get from SA */ /* We also drop existing X-Spam headers */ if ((strstr((char *)hl->text, "Content-Type: ") == (char *)hl->text) || (strstr((char *)hl->text, "X-Spam-") == (char *)hl->text) ) { hl->type = '*'; } hl=hl->next; } ret=write(writefd[1],"\n",1); CHECKERR(ret,"header separation write",__LINE__); /* Now read the body and send it to SA */ /* Let's not send more body data than SA is going to process though */ chunk=(samaxbody / sizeof(buffera)); while ((ret=read(fd, buffer, sizeof(buffera))) > 0 && chunk-- > 0) { ret=write(writefd[1], buffer, ret); CHECKERR(ret,"body write",__LINE__); } CHECKERR(ret, "read body", __LINE__ - 4); close(writefd[1]); /* Let's see what SA has to tell us about this mail and store the headers */ while ((fgets((char *)buffer,sizeof(buffera),readfh)) != NULL) { /* No more headers, body follows */ if (!strcmp(buffer, "\n")) { if (gotsa == 0) { goto exit; } } if (strstr(buffer, "Content-Type: ") == buffer) { contenttype=string_copy(buffer); header_add(' ', contenttype); continue; } if (strstr(buffer, "Message-Id: ") == buffer) { char *ptr; mesgid=string_copy(buffer+13); mesgid[strlen(mesgid)-2]=0; /* Update message filename in case it needs to be saved on disk */ safemesgid=string_copy(mesgid); ptr=safemesgid; /* Clean Message-ID to make sure people can't write on our FS */ while (*ptr) { if (*ptr == '/') { *ptr='-'; } ptr++; } mesgfn=string_sprintf("%d_%s", time(NULL), safemesgid); continue; } if (strstr(buffer, "X-Spam-") != buffer) { continue; } if (strstr(buffer, "X-Spam-Status: ") == buffer) { char *start; char *end; gotsa=1; xspamstatus=string_copy(buffer); header_add(' ', xspamstatus); start=strstr(xspamstatus, "hits="); end=strstr(xspamstatus, " tests="); if (start!=NULL && end!=NULL) { spamstatus=string_copyn(start, end-start); } else { log_write(0, LOG_REJECT, "local_scan SA: Could not parse X-Spam-Status: to extract hits and required. Bad!. Got: '%s'", xspamstatus); } start=strstr(spamstatus, "="); end=strstr(spamstatus, " "); if (start!=NULL && end!=NULL) { start++; sscanf(start, "%f", &spamvalue); } else { log_write(0, LOG_REJECT, "local_scan SA: Spam value extract failed in '%s'. Bad!", xspamstatus); } continue; } if (strstr(buffer, "X-Spam-Flag: ") == buffer) { xspamflag=string_copy(buffer); header_add(' ', xspamflag); if (buffer[13] == 'Y') { isspam=1; } continue; } if (strstr(buffer, "X-Spam-Level: ") == buffer) { xspamlevel=string_copy(buffer); header_add(' ', xspamlevel); continue; } if (strstr(buffer, "X-Spam-Checker-Version: ") == buffer) { xspamcheckerversion=string_copy(buffer); header_add(' ', xspamcheckerversion); continue; } if (strstr(buffer, "X-Spam-Prev-Content-Type: ") == buffer) { xspamprevcontenttype=string_copy(buffer); header_add(' ', xspamprevcontenttype); continue; } if (strstr(buffer, "X-Spam-Report: ") == buffer) { const int rbufsize=sizeof(reportbuffera); strncpy(reportbuffer, buffer, rbufsize); reportbuffer[rbufsize-1]=0; while ((fgets((char *)buffer,sizeof(buffera),readfh)) != NULL) { if (strcmp(buffer, "\n")) { if (strstr(buffer, " ") != buffer) { PANIC(string_sprintf("Unexpected string while parsing X-Spam-Report %s", buffer)); } strncat(reportbuffer,buffer,rbufsize-strlen(reportbuffer)); reportbuffer[rbufsize-1]=0; } else { xspamreport=string_copy(reportbuffer); header_add(' ', xspamreport); break; } } } } exit: fclose(readfh); wait(&ret); if (ret) { sprintf(buffer, "%d", ret); PANIC(string_sprintf("wait on spamc child yielded, %s", buffer)); } if (gotsa == 0) { log_write(0, LOG_MAIN, "SA wasn't successfully run against message"); return LOCAL_SCAN_ACCEPT; } if (spamstatus == NULL) { spamstatus = (char *) nospamstatus; } if (isspam) { if (spamvalue > SAdevnull) { if (SAdevnullsave) { ret=savemail(fd, string_sprintf("%s/%s", SAdevnullsave, mesgfn), SAmaxarchivebody, SAEximDebug); CHECKERR(ret,where,line); } recipients_count=0; spamstatus=string_sprintf("%s trigger=%.1f", spamstatus, SAdevnull); log_write(0, LOG_REJECT, "local_scan SA silently tossed message: %s", spamstatus); /* As of exim 4.04, you can't return a custom ok message :( */ *return_text=string_sprintf("Message is spam (score: %s)\nand is being fed to the bit bucket\n(please don't feed it too much, it might get fat)", spamstatus); return LOCAL_SCAN_ACCEPT; } else if (spamvalue > SApermreject) { if (SApermrejectsave) { ret=savemail(fd, string_sprintf("%s/%s", SApermrejectsave, mesgfn), SAmaxarchivebody, SAEximDebug); CHECKERR(ret,where,line); } spamstatus=string_sprintf("%s trigger=%.1f", spamstatus, SApermreject); *return_text=string_sprintf("Heuristics guessed that this message was spam:\n%s", spamstatus); return LOCAL_SCAN_REJECT; } else if (spamvalue > SAtempreject) { if (SAtemprejectsave) { /* By default, we'll only save temp bounces by message ID so * that when the same message is submitted several times, we * overwrite the same file on disk and not create a brand new * one every single time */ if (SAtemprejectoverwrite && safemesgid) { ret=savemail(fd, string_sprintf("%s/%s", SAtemprejectsave, safemesgid), SAmaxarchivebody, SAEximDebug); CHECKERR(ret,where,line); } else { ret=savemail(fd, string_sprintf("%s/%s", SAtemprejectsave, mesgfn), SAmaxarchivebody, SAEximDebug); CHECKERR(ret,where,line); } } spamstatus=string_sprintf("%s trigger=%.1f", spamstatus, SAtempreject); *return_text=string_sprintf("Heuristics guessed that this message was spam:\n%s, so it is temporarilty rejected.\nAdmins may whitelist this and it may be accepted when you resubmit it.", spamstatus); return LOCAL_SCAN_TEMPREJECT; } else { /* As of exim 4.04, you can't return a custom ok message :( */ *return_text=string_sprintf("Message accepted, heuristics found a spam likelyhood score of: %s", spamstatus); return LOCAL_SCAN_ACCEPT; } } else { log_write(0, LOG_MAIN, "SA score: %s", spamstatus); return LOCAL_SCAN_ACCEPT; } errexit: if (SAerrorsave) { ret=savemail(fd, string_sprintf("%s/%s", SAerrorsave, mesgfn), 1073741824, SAEximDebug); if (ret < 0) { log_write(0, LOG_MAIN, "local_scan SA: Error in error handler while trying to save mail to %s, file "__FILE__", line %d: %s", string_sprintf("%s/%s", SAerrorsave, mesgfn), __LINE__ - 3, strerror(errno)); } } if (SAtemprejectonerror) { *return_text=string_sprintf("Temporary local error while processing message, please contact postmaster"); /* Unfortunately, this spits up the message twice, once here and once by * the return TEMPREJECT */ log_write(0, LOG_REJECT, "local_scan SA: Unexpected error on %s, file "__FILE__", line %d: %s", where, line-1, strerror(errno)); return LOCAL_SCAN_TEMPREJECT; } else { log_write(0, LOG_MAIN, "local_scan SA: Unexpected error on %s (but message was accepted), file "__FILE__", line %d: %s", where, line-1, strerror(errno)); return LOCAL_SCAN_ACCEPT; } panicexit: if (SAerrorsave) { ret=savemail(fd, string_sprintf("%s/%s", SAerrorsave, mesgfn), 1073741824, SAEximDebug); if (ret < 0) { log_write(0, LOG_MAIN, "local_scan SA: Error in error handler while trying to save mail to %s, file "__FILE__", line %d: %s", string_sprintf("%s/%s", SAerrorsave, mesgfn), __LINE__ - 3, strerror(errno)); } } if (SAtemprejectonerror) { *return_text=string_sprintf("Temporary local error while processing message, please contact postmaster"); log_write(0, LOG_REJECT, "local_scan SA: %s", panicerror); return LOCAL_SCAN_TEMPREJECT; } else { log_write(0, LOG_MAIN, "local_scan SA: %s (but message was accepted)", panicerror); return LOCAL_SCAN_ACCEPT; } } /* End of local_scan.c */ /* To ask Philip: 1) read/use return_text on 2xx 2) optional log_write REJECT without triggering a full dump of the rejected headers 3) I need to return '\n' in return_text, but it gets logged, and log_write isn't supposed to get newlines... */ /* TODO: Some condition seems to generate multiple Content-Type headers 2002-05-06 01:46:08 174e8E-0004XW-00 rejected by local_scan(): Heuristics guessed that this message was spam:\nhits=13.6 required=7.0 trigger=12.0 Envelope-from: <> Envelope-to: P Received: from mail.vasoftware.com ([198.186.202.175]:48793) by mail2.merlins.org with esmtp (Cipher TLSv1:DES-CBC3-SHA:168) (Exim 4.04 #69 (Debian)) id 174e8E-0004XW-00 for ; Mon, 06 May 2002 01:46:06 -0700 P Received: from mail by mail.vasoftware.com with local (Exim 3.31-VA-mm2 #1 (Debian)) id 174e8D-0000Ha-00 for ; Mon, 06 May 2002 01:46:05 -0700 X-Failed-Recipients: marc@merlins.org F From: Mail Delivery System T To: me Subject: Mail delivery failed: returning message to sender I Message-Id: Date: Mon, 06 May 2002 01:46:05 -0700 X-Spam-Status: Yes, hits=13.6 required=7.0 tests=DEAR_SOMEBODY,DEAR_FRIEND,REMOVE_SUBJ,REPLY_REMOVE_SUBJECT,SUBJ_REMOVE,CLICK_BELOW,EXCUSE_6,BIG_FONT,CLICK_HERE_LINK,WEB_BUGS version=2.20 X-Spam-Flag: YES X-Spam-Level: ************* X-Spam-Checker-Version: SpamAssassin 2.20 (devel $Id: SpamAssassin.pm,v 1.77 2002/04/06 19:28:30 hughescr Exp $) X-Spam-Report: 13.6 hits, 7 required; * -0.5 -- BODY: Contains 'Dear Somebody' * 2.1 -- BODY: How dear can you be if you don't know my name? * 2.3 -- BODY: List removal information * 0.3 -- BODY: List removal information * 0.1 -- BODY: List removal information * 1.5 -- BODY: Asks you to click below * -0.1 -- BODY: Claims you can be removed from the list * 2.1 -- BODY: FONT Size +2 and up or 3 and up * 1.8 -- BODY: Tells you to click on a URL * 4.0 -- BODY: Image tag with an ID code to identify you Content-Type: multipart/alternative; boundary="PVODXW" Content-Type: text/plain; charset=us-ascii Content-Type: text/html; charset=us-ascii */