/************************************************* * Exim - an Internet mail transport agent * *************************************************/ /* Copyright (c) University of Cambridge 1995 - 2002 */ /* See the file NOTICE for conditions of use and distribution. */ /* Spamassassin in local_scan by Marc MERLIN - 2002/04/15 Home page: http://marc.merlins.org/linux/exim/sa.html 2002/05/06 - v1.0.1 - Fixed problems with reading Content-Type back from spamc (we used to read whole body too, ooops...) - Handle multi-line Content-Type headers. - All log entries now start with "SA: " - If SA gets run twice, we drop the previous X-Spam headers - Support for X-Spam-Prev-Content-Transfer-Encoding 2002/05/05 - v1.0 *Many* changes. This is now a real program with hopefully most of the configuration options people could need - Fully configurable through external config file - Condition to run or not run SA against a message - 3 Levels of spam handling - Option to save messages that are rejected or cause errors - Very complete error checking 2002/04/17 - v0.9.1 Added a few checks for possible failures 2002/04/16 - v0.9 Pre-release For all this to work correctly, you global spamassassin config should have: # disable web bugs and other potentially dangerous attachements defang_mime 1 # Put report in the headers report_header 1 use_terse_report 1 # Do not rewrite the subject line with "****SPAM****" by default rewrite_subject 0 Note too that this script will not act on any mail before it is flagged as SPAM by SA, but SA can flag mail as spam and you can set a higher threshold for action in local_scan */ #include #include #include #include #include #include #include #include #include #include #include "local_scan.h" /******************************/ /* Compile time config values */ /******************************/ static const char conffile[]="/etc/mail/spamassassin.conf"; static const char spamc[]="/usr/bin/spamc"; /* How much message body you want to feed to SA. By Default SA only processes 250k If you increase this value, you'll have to change the call to spamc and give it -s size */ static const int samaxbody=250*1024; /********************/ /* Code starts here */ /********************/ static const char nospamstatus[]=""; static char *buffera[4096]; static char *buffer=(char *)buffera; static char *where="Error handler called without error string"; static int line=-1; static char *panicerror; #define CHECKERR(mret, mwhere, mline) \ if (mret < 0) \ { \ where=mwhere; \ line=mline; \ goto errexit; \ } \ #define PANIC(merror) \ panicerror=merror; \ goto panicexit; /* Rejected mails can be archived in a spool directory */ /* filename will contain a double / before the filename, I prefer two to none */ int savemail(int readfd, char *filename, int SAmaxarchivebody, int SAEximDebug) { header_line *hl; FILE *readfh; int writefd; int ret; int chunk; if (SAEximDebug) { log_write(0, LOG_MAIN, "SA: Writing suspected spam/problem message to %s", filename); } /* Let's not worry about you receiving two spams at the same second * with the same message ID. If you do, the second one will overwrite * the first one */ writefd=creat(filename, S_IRUSR|S_IWUSR); CHECKERR(writefd, string_sprintf("creat %s", filename),__LINE__); /* First we need to get the header lines from exim, and then we can read the body from writefd */ hl=header_list; while (hl != NULL) { /* type '*' means the header is internal, don't print it */ if (hl->type == '*') { hl=hl->next; continue; } ret=write(writefd,hl->text,strlen(hl->text)); CHECKERR(ret,string_sprintf("header line write in %s", filename),__LINE__); hl=hl->next; } ret=write(writefd,"\n",1); CHECKERR(ret,string_sprintf("header separation write in %s", filename),__LINE__); /* Now copy the body to the save file */ /* we already read from readfd, so we need to reset it */ ret=lseek(readfd, SEEK_SET, 0); CHECKERR(ret, "lseek reset on spooled message", __LINE__); /* This sets us back a bit too far, we need to skip the first line which */ /* contains the message ID on disk */ readfh=fdopen(readfd, "r"); CHECKERR(readfh, "fdopen",__LINE__); fgets((char *)buffer, sizeof(buffera), readfh); chunk=(SAmaxarchivebody / (sizeof(buffera)-1))+1; if (SAEximDebug > 4) { log_write(0, LOG_MAIN, "SA: Body write chunk starts with %d/%d=%d", SAmaxarchivebody , sizeof(buffera), chunk); } while ((ret=fread(buffer, 1, sizeof(buffera),readfh)) > 0 && chunk-- > 0) { if (SAEximDebug > 4) { log_write(0, LOG_MAIN, "SA: Processing body chunk %d", chunk); } ret=write(writefd, buffer, ret); CHECKERR(ret,string_sprintf("body write in %s", filename),__LINE__); } ret=ferror(readfh); CHECKERR(ret, "read body of spooled message for archival", __LINE__); return 0; errexit: return -1; } int local_scan(int fd, uschar **return_text) { header_line *hl; int ret; int pid; int writefd[2]; int readfd[2]; FILE *readfh; char *mesgfn=NULL; char *safemesgid=NULL; static int readconffile=0; static int debugmesg=0; static int SAEximDebug=0; static char *SAspamcpath="/usr/bin/spamc"; static int SAmaxarchivebody=20*1048576; static char *SAerrorsave=NULL; static int SAtemprejectonerror=0; static char *SAEximRunCond="0"; static float SAdevnull=99999.0; static float SApermreject=99999.0; static float SAtempreject=99999.0; static char *SAdevnullsave=NULL; static char *SApermrejectsave=NULL; static char *SAtemprejectsave=NULL; static int SAtemprejectoverwrite=1; int isspam=0; int gotsa=0; int chunk; uschar *expand; float spamvalue=0.0; char *spamstatus=NULL; char *contenttype=NULL; char *contenttransferencoding=NULL; char *xspamstatus=NULL; char *xspamflag=NULL; char *xspamlevel=NULL; /* SA 2.20 or better */ char *xspamcheckerversion=NULL; char *xspamprevcontenttype=NULL; char *xspamprevcontenttransferencoding=NULL; char *xspamreport=NULL; char *mesgid=NULL; /* Any error can write the faulty message to mesgfn, so we need to give it *some* value right now, and improve it as we go along */ mesgfn=string_sprintf("%d", time(NULL)); /* We won't scan local messages. I think exim bypasses local_scan for a * bounce generated after a locally submitted message, but better be safe */ /* This is commented out now because you can control it with SAEximRunCond if (!sender_host_address) { return LOCAL_SCAN_ACCEPT; } */ /* * We keep track of whether we've alrady read the config file, but since * exim spawns itself, it will get read by exim children even though you * didn't restart exim. That said, after you change the config file, you * should restart exim to make sure all the instances pick up the new * config file */ /* Never mind, this causes data corruption because the buffers we get from * exim are overwritten by the time we get called again */ /* if (!readconffile) */ if (1) { ret=open(conffile, 0); CHECKERR(ret,"conf file open",__LINE__); readfh=fdopen(ret, "r"); CHECKERR(readfh,"fdopen",__LINE__); while ((fgets((char *)buffer,sizeof(buffera),readfh)) != NULL) { if (*buffer == '#') { continue; } if (*buffer == '\n' ) { continue; } if (*buffer != 'S' || *(buffer+1) != 'A') { log_write(0, LOG_MAIN, "SA: error while reading configuration file %s. Line does not begin with a SA directive: '%s', ignoring", conffile, buffer); continue; } #define M_CHECKFORVAR(VAR, TYPE) \ if (strstr(buffer, #VAR ## ": ") == buffer) \ { \ if (sscanf(buffer, #VAR ## ": " ## TYPE, & ## VAR)) \ { \ if (SAEximDebug > 3) \ { \ if (SAEximDebug && ! debugmesg) \ { \ log_write(0, LOG_MAIN, "SA: Debug enabled, reading config from file %s", conffile); \ debugmesg=1; \ } \ else \ { \ log_write(0, LOG_MAIN, "SA: config read "## #VAR ##" = "## TYPE, VAR); \ }\ }\ } \ else \ { \ log_write(0, LOG_MAIN, "SA: error while reading configuration file %s. Can't parse value in: '%s', ignoring", conffile, buffer); \ } \ continue; \ } #define M_CHECKFORSTR(VAR) \ if (strstr(buffer, #VAR ## ": ") == buffer) \ { \ VAR = string_copy(buffer+strlen( #VAR )+2); \ \ if (VAR[strlen(VAR)-1] == '\n') \ { \ VAR[strlen(VAR)-1]=0; \ } \ if (SAEximDebug > 3) \ { \ log_write(0, LOG_MAIN, "SA: config read "## #VAR ##" = %s", VAR); \ } \ continue; \ } M_CHECKFORVAR(SAEximDebug, "%d"); M_CHECKFORSTR(SAspamcpath); M_CHECKFORVAR(SAmaxarchivebody, "%d"); M_CHECKFORSTR(SAerrorsave); M_CHECKFORVAR(SAtemprejectonerror, "%d"); M_CHECKFORSTR(SAEximRunCond); M_CHECKFORVAR(SAdevnull, "%f"); M_CHECKFORSTR(SAdevnullsave); M_CHECKFORVAR(SApermreject, "%f"); M_CHECKFORSTR(SApermrejectsave); M_CHECKFORVAR(SAtempreject, "%f"); M_CHECKFORSTR(SAtemprejectsave); M_CHECKFORVAR(SAtemprejectoverwrite, "%d"); } readconffile=1; } expand=expand_string(SAEximRunCond); if (expand == NULL) { PANIC(string_sprintf("SAEximRunCond expansion failure on %s", SAEximRunCond)); } if (SAEximDebug) { log_write(0, LOG_MAIN, "SA: check expand returned: '%s'", expand); } /* Bail from SA if the expansion string says so */ if (strlen(expand) == 0 || !strcmp(expand, "0")) { log_write(0, LOG_MAIN, "SA: check skipped due to expansion to false"); return LOCAL_SCAN_ACCEPT; } if (SAEximDebug) { log_write(0, LOG_MAIN, "SA: check succeeded, running spamc"); } /* Fork off spamc, and get ready to talk to it */ ret=pipe(writefd); ret=pipe(readfd); CHECKERR(ret,"pipe 2",__LINE__); if ((pid=fork()) < 0) { CHECKERR(pid, "fork", __LINE__ - 1); } if (pid == 0) { close(readfd[0]); close(writefd[1]); ret=dup2(writefd[0],0); CHECKERR(ret,"dup2 stdin",__LINE__); ret=dup2(readfd[1],1); CHECKERR(ret,"dup2 stdout",__LINE__); ret=dup2(readfd[1],2); CHECKERR(ret,"dup2 stderr",__LINE__); /* * If you are really smart, you'd implement the spamc protocol and * talk to spamd directly instead of forking spamc, but considering * the overhead spent in spamd, forking off spamc seemed acceptable * rather than re-implementing and tracking the spamc/spamd protocol */ ret=execl(SAspamcpath, "spamc", NULL); CHECKERR(ret,string_sprintf("exec %s", SAspamcpath),__LINE__); } ret=close(readfd[1]); CHECKERR(ret,"close r",__LINE__); ret=close(writefd[0]); CHECKERR(ret,"close w",__LINE__); readfh=fdopen(readfd[0], "r"); /* Ok, we're ready for spewing the mail at spamc */ /* First we need to get the header lines from exim, and then we can read the body from fd */ hl=header_list; while (hl != NULL) { /* type '*' means the header is internal, don't print it */ if (hl->type == '*') { hl=hl->next; continue; } /* Let's not send X-Spam headers, we're generating new ones */ if (strstr((char *)hl->text, "X-Spam-") == NULL) { ret=write(writefd[1],hl->text,strlen(hl->text)); CHECKERR(ret,"header line write",__LINE__); } /* We are replacing the Content-Type header by the one we get from SA */ /* We also drop existing X-Spam headers */ /* Exim is nice and gives us multiline headers as one long line */ if ((strstr((char *)hl->text, "Content-Type: ") == (char *)hl->text) || (strstr((char *)hl->text, "Content-Transfer-Encoding:") == (char *)hl->text) || (strstr((char *)hl->text, "X-Spam-") == (char *)hl->text) ) { if (SAEximDebug> 5) { log_write(0, LOG_MAIN, "SA: while reading headers, marked as deleted: '%s'", (char *)hl->text); } hl->type = '*'; } hl=hl->next; } ret=write(writefd[1],"\n",1); CHECKERR(ret,"header separation write",__LINE__); /* Now read the body and send it to SA */ /* Let's not send more body data than SA is going to process though */ chunk=(samaxbody / sizeof(buffera)); while ((ret=read(fd, buffer, sizeof(buffera))) > 0 && chunk-- > 0) { ret=write(writefd[1], buffer, ret); CHECKERR(ret,"body write",__LINE__); } CHECKERR(ret, "read body", __LINE__ - 4); close(writefd[1]); if (SAEximDebug> 5) { log_write(0, LOG_MAIN, "SA: fed spam to spamc, reading result", expand); } /* Let's see what SA has to tell us about this mail and store the headers */ while ((fgets((char *)buffer,sizeof(buffera),readfh)) != NULL) { if (SAEximDebug> 5) { log_write(0, LOG_MAIN, "SA: spamc read: %s", buffer); } /* Let's handle special multi-line headers first */ if (strstr(buffer, "Content-Type: ") == buffer) { contenttype=string_copy(buffer); /* Content-Type can be multi-line */ while ((fgets((char *)buffer,sizeof(buffera),readfh)) != NULL) { if (buffer[0] != ' ' && buffer[0] != '\t') { if (SAEximDebug> 5) { log_write(0, LOG_MAIN, "SA: spamc read: %s", buffer); } break; } /* Slight waste of memory here, oh well... */ contenttype=string_sprintf("%s%s", contenttype, buffer); } header_add(' ', contenttype); if (SAEximDebug> 5) { log_write(0, LOG_MAIN, "SA: spamc pieced up Content-Type as: '%s'", contenttype); } } if (strstr(buffer, "X-Spam-Prev-Content-Type: ") == buffer) { xspamprevcontenttype=string_copy(buffer); /* X-Spam-Prev-Content-Type can be multi-line */ while ((fgets((char *)buffer,sizeof(buffera),readfh)) != NULL) { if (buffer[0] != ' ' && buffer[0] != '\t') { if (SAEximDebug> 5) { log_write(0, LOG_MAIN, "SA: spamc read: %s", buffer); } break; } /* Slight waste of memory here, oh well... */ xspamprevcontenttype=string_sprintf("%s%s", xspamprevcontenttype, buffer); } header_add(' ', xspamprevcontenttype); if (SAEximDebug> 5) { log_write(0, LOG_MAIN, "SA: spamc pieced up X-Spam-Prev-Content-Type as: '%s'", xspamprevcontenttype); } } /* Ok, now we can do normal processing */ /* If no more headers are there, we're done */ if (!strcmp(buffer, "\n")) { if (SAEximDebug> 5) { log_write(0, LOG_MAIN, "SA: spamc read got newline, end of headers", buffer); } goto exit; } if (strstr(buffer, "Content-Transfer-Encoding: ") == buffer) { contenttransferencoding=string_copy(buffer); header_add(' ', contenttransferencoding); } if (strstr(buffer, "Message-Id: ") == buffer) { char *ptr; mesgid=string_copy(buffer+13); mesgid[strlen(mesgid)-2]=0; /* Update message filename in case it needs to be saved on disk */ safemesgid=string_copy(mesgid); ptr=safemesgid; /* Clean Message-ID to make sure people can't write on our FS */ while (*ptr) { if (*ptr == '/') { *ptr='-'; } ptr++; } if (SAEximDebug> 5) { log_write(0, LOG_MAIN, "SA: Message-Id received and cleaned as: %s", safemesgid); } mesgfn=string_sprintf("%d_%s", time(NULL), safemesgid); continue; } if (strstr(buffer, "X-Spam-") != buffer) { continue; } if (strstr(buffer, "X-Spam-Status: ") == buffer) { char *start; char *end; gotsa=1; xspamstatus=string_copy(buffer); header_add(' ', xspamstatus); start=strstr(xspamstatus, "hits="); end=strstr(xspamstatus, " tests="); if (start!=NULL && end!=NULL) { spamstatus=string_copyn(start, end-start); } else { log_write(0, LOG_REJECT, "SA: could not parse X-Spam-Status: to extract hits and required. Bad!. Got: '%s'", xspamstatus); } start=strstr(spamstatus, "="); end=strstr(spamstatus, " "); if (start!=NULL && end!=NULL) { start++; sscanf(start, "%f", &spamvalue); } else { log_write(0, LOG_REJECT, "SA: spam value extract failed in '%s'. Bad!", xspamstatus); } continue; } if (strstr(buffer, "X-Spam-Flag: ") == buffer) { xspamflag=string_copy(buffer); header_add(' ', xspamflag); if (buffer[13] == 'Y') { isspam=1; } continue; } if (strstr(buffer, "X-Spam-Level: ") == buffer) { xspamlevel=string_copy(buffer); header_add(' ', xspamlevel); continue; } if (strstr(buffer, "X-Spam-Checker-Version: ") == buffer) { xspamcheckerversion=string_copy(buffer); header_add(' ', xspamcheckerversion); continue; } if (strstr(buffer, "X-Spam-Prev-Content-Transfer-Encoding: ") == buffer) { xspamprevcontenttransferencoding=string_copy(buffer); header_add(' ', xspamprevcontenttransferencoding); continue; } if (strstr(buffer, "X-Spam-Report: ") == buffer) { xspamreport=string_copy(buffer); while ((fgets((char *)buffer,sizeof(buffera),readfh)) != NULL) { if (buffer[0] != '\n') { if (strstr(buffer, " ") != buffer) { PANIC(string_sprintf("Unexpected string while parsing X-Spam-Report %s", buffer)); } /* Slight waste of memory here, oh well... */ xspamreport=string_sprintf("%s%s", xspamreport, buffer); } else { header_add(' ', xspamreport); ungetc('\n', readfh); break; } } } } exit: fclose(readfh); wait(&ret); if (ret) { sprintf(buffer, "%d", ret); PANIC(string_sprintf("wait on spamc child yielded, %s", buffer)); } if (gotsa == 0) { log_write(0, LOG_MAIN, "SA: SA didn't successfully run against message"); return LOCAL_SCAN_ACCEPT; } if (spamstatus == NULL) { spamstatus = (char *) nospamstatus; } if (isspam) { if (spamvalue > SAdevnull) { if (SAdevnullsave) { ret=savemail(fd, string_sprintf("%s/%s", SAdevnullsave, mesgfn), SAmaxarchivebody, SAEximDebug); CHECKERR(ret,where,line); } recipients_count=0; spamstatus=string_sprintf("%s trigger=%.1f", spamstatus, SAdevnull); log_write(0, LOG_REJECT, "SA: local_scan silently tossed message: %s", spamstatus); /* As of exim 4.04, you can't return a custom ok message :( */ *return_text=string_sprintf("Message is spam (score: %s)\nand is being fed to the bit bucket\n(please don't feed it too much, it might get fat)", spamstatus); return LOCAL_SCAN_ACCEPT; } else if (spamvalue > SApermreject) { if (SApermrejectsave) { ret=savemail(fd, string_sprintf("%s/%s", SApermrejectsave, mesgfn), SAmaxarchivebody, SAEximDebug); CHECKERR(ret,where,line); } spamstatus=string_sprintf("%s trigger=%.1f", spamstatus, SApermreject); *return_text=string_sprintf("Heuristics guessed that this message was spam:\n%s", spamstatus); return LOCAL_SCAN_REJECT; } else if (spamvalue > SAtempreject) { if (SAtemprejectsave) { /* By default, we'll only save temp bounces by message ID so * that when the same message is submitted several times, we * overwrite the same file on disk and not create a brand new * one every single time */ if (SAtemprejectoverwrite && safemesgid) { ret=savemail(fd, string_sprintf("%s/%s", SAtemprejectsave, safemesgid), SAmaxarchivebody, SAEximDebug); CHECKERR(ret,where,line); } else { ret=savemail(fd, string_sprintf("%s/%s", SAtemprejectsave, mesgfn), SAmaxarchivebody, SAEximDebug); CHECKERR(ret,where,line); } } spamstatus=string_sprintf("%s trigger=%.1f", spamstatus, SAtempreject); *return_text=string_sprintf("Heuristics guessed that this message was spam:\n%s, so it is temporarilty rejected.\nAdmins may whitelist this and it may be accepted when you resubmit it.", spamstatus); return LOCAL_SCAN_TEMPREJECT; } else { /* As of exim 4.04, you can't return a custom ok message :( */ *return_text=string_sprintf("Message accepted, heuristics found a spam likelyhood score of: %s", spamstatus); return LOCAL_SCAN_ACCEPT; } } else { log_write(0, LOG_MAIN, "SA: score %s", spamstatus); return LOCAL_SCAN_ACCEPT; } errexit: if (SAerrorsave) { ret=savemail(fd, string_sprintf("%s/%s", SAerrorsave, mesgfn), 1073741824, SAEximDebug); if (ret < 0) { log_write(0, LOG_MAIN, "SA: Error in error handler while trying to save mail to %s, file "__FILE__", line %d: %s", string_sprintf("%s/%s", SAerrorsave, mesgfn), __LINE__ - 3, strerror(errno)); } } if (SAtemprejectonerror) { *return_text=string_sprintf("Temporary local error while processing message, please contact postmaster"); /* Unfortunately, this spits up the message twice, once here and once by * the return TEMPREJECT */ log_write(0, LOG_REJECT, "SA: Unexpected error on %s, file "__FILE__", line %d: %s", where, line-1, strerror(errno)); return LOCAL_SCAN_TEMPREJECT; } else { log_write(0, LOG_MAIN, "SA: Unexpected error on %s (but message was accepted), file "__FILE__", line %d: %s", where, line-1, strerror(errno)); return LOCAL_SCAN_ACCEPT; } panicexit: if (SAerrorsave) { ret=savemail(fd, string_sprintf("%s/%s", SAerrorsave, mesgfn), 1073741824, SAEximDebug); if (ret < 0) { log_write(0, LOG_MAIN, "SA: Error in error handler while trying to save mail to %s, file "__FILE__", line %d: %s", string_sprintf("%s/%s", SAerrorsave, mesgfn), __LINE__ - 3, strerror(errno)); } } if (SAtemprejectonerror) { *return_text=string_sprintf("Temporary local error while processing message, please contact postmaster"); log_write(0, LOG_REJECT, "SA: %s", panicerror); return LOCAL_SCAN_TEMPREJECT; } else { log_write(0, LOG_MAIN, "SA: %s (but message was accepted)", panicerror); return LOCAL_SCAN_ACCEPT; } } /* End of local_scan.c */ /* To ask Philip: 1) read/use return_text on 2xx 2) optional log_write REJECT without triggering a full dump of the rejected headers 3) I need to return '\n' in return_text, but it gets logged, and log_write isn't supposed to get newlines... */ /* TODO: Handle header lines in mixed case Do my own memory handling so that I can store config options across calls (using the exim functions causes memory corruption since exim reuses the memory) */