444 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Lua
		
	
	
	
	
	
			
		
		
	
	
			444 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Lua
		
	
	
	
	
	
| local fun = require "fun"
 | |
| local rspamd_logger = require "rspamd_logger"
 | |
| local util = require "rspamd_util"
 | |
| local lua_util = require "lua_util"
 | |
| local rspamd_regexp = require "rspamd_regexp"
 | |
| local ucl = require "ucl"
 | |
| 
 | |
| local complicated = {}
 | |
| local rules = {}
 | |
| local scores = {}
 | |
| 
 | |
| local function words_to_re(words, start)
 | |
|   return table.concat(fun.totable(fun.drop_n(start, words)), " ");
 | |
| end
 | |
| 
 | |
| local function split(str, delim)
 | |
|   local result = {}
 | |
| 
 | |
|   if not delim then
 | |
|     delim = '[^%s]+'
 | |
|   end
 | |
| 
 | |
|   for token in string.gmatch(str, delim) do
 | |
|     table.insert(result, token)
 | |
|   end
 | |
| 
 | |
|   return result
 | |
| end
 | |
| 
 | |
| local function handle_header_def(hline, cur_rule)
 | |
|   --Now check for modifiers inside header's name
 | |
|   local hdrs = split(hline, '[^|]+')
 | |
|   local hdr_params = {}
 | |
|   local cur_param = {}
 | |
|   -- Check if an re is an ordinary re
 | |
|   local ordinary = true
 | |
| 
 | |
|   for _,h in ipairs(hdrs) do
 | |
|     if h == 'ALL' or h == 'ALL:raw' then
 | |
|       ordinary = false
 | |
|     else
 | |
|       local args = split(h, '[^:]+')
 | |
|       cur_param['strong'] = false
 | |
|       cur_param['raw'] = false
 | |
|       cur_param['header'] = args[1]
 | |
| 
 | |
|       if args[2] then
 | |
|         -- We have some ops that are required for the header, so it's not ordinary
 | |
|         ordinary = false
 | |
|       end
 | |
| 
 | |
|       fun.each(function(func)
 | |
|           if func == 'addr' then
 | |
|             cur_param['function'] = function(str)
 | |
|               local addr_parsed = util.parse_addr(str)
 | |
|               local ret = {}
 | |
|               if addr_parsed then
 | |
|                 for _,elt in ipairs(addr_parsed) do
 | |
|                   if elt['addr'] then
 | |
|                     table.insert(ret, elt['addr'])
 | |
|                   end
 | |
|                 end
 | |
|               end
 | |
| 
 | |
|               return ret
 | |
|             end
 | |
|           elseif func == 'name' then
 | |
|             cur_param['function'] = function(str)
 | |
|               local addr_parsed = util.parse_addr(str)
 | |
|               local ret = {}
 | |
|               if addr_parsed then
 | |
|                 for _,elt in ipairs(addr_parsed) do
 | |
|                   if elt['name'] then
 | |
|                     table.insert(ret, elt['name'])
 | |
|                   end
 | |
|                 end
 | |
|               end
 | |
| 
 | |
|               return ret
 | |
|             end
 | |
|           elseif func == 'raw' then
 | |
|             cur_param['raw'] = true
 | |
|           elseif func == 'case' then
 | |
|             cur_param['strong'] = true
 | |
|           else
 | |
|             rspamd_logger.warnx(rspamd_config, 'Function %1 is not supported in %2',
 | |
|               func, cur_rule['symbol'])
 | |
|           end
 | |
|         end, fun.tail(args))
 | |
| 
 | |
|         -- Some header rules require splitting to check of multiple headers
 | |
|         if cur_param['header'] == 'MESSAGEID' then
 | |
|           -- Special case for spamassassin
 | |
|           ordinary = false
 | |
|         elseif cur_param['header'] == 'ToCc' then
 | |
|           ordinary = false
 | |
|         else
 | |
|           table.insert(hdr_params, cur_param)
 | |
|         end
 | |
|     end
 | |
| 
 | |
|     cur_rule['ordinary'] = ordinary and (not (#hdr_params > 1))
 | |
|     cur_rule['header'] = hdr_params
 | |
|   end
 | |
| end
 | |
| 
 | |
| local function process_sa_conf(f)
 | |
|   local cur_rule = {}
 | |
|   local valid_rule = false
 | |
| 
 | |
|   local function insert_cur_rule()
 | |
|    if not rules[cur_rule.type] then
 | |
|      rules[cur_rule.type] = {}
 | |
|    end
 | |
| 
 | |
|    local target = rules[cur_rule.type]
 | |
| 
 | |
|    if cur_rule.type == 'header' then
 | |
|      if not cur_rule.header[1].header then
 | |
|       rspamd_logger.errx(rspamd_config, 'bad rule definition: %1', cur_rule)
 | |
|       return
 | |
|      end
 | |
|      if not target[cur_rule.header[1].header] then
 | |
|        target[cur_rule.header[1].header] = {}
 | |
|      end
 | |
|      target = target[cur_rule.header[1].header]
 | |
|    end
 | |
| 
 | |
|    if not cur_rule['symbol'] then
 | |
|      rspamd_logger.errx(rspamd_config, 'bad rule definition: %1', cur_rule)
 | |
|      return
 | |
|    end
 | |
|    target[cur_rule['symbol']] = cur_rule
 | |
|    cur_rule = {}
 | |
|    valid_rule = false
 | |
|   end
 | |
| 
 | |
|   local function parse_score(words)
 | |
|     if #words == 3 then
 | |
|       -- score rule <x>
 | |
|       return tonumber(words[3])
 | |
|     elseif #words == 6 then
 | |
|       -- score rule <x1> <x2> <x3> <x4>
 | |
|       -- we assume here that bayes and network are enabled and select <x4>
 | |
|       return tonumber(words[6])
 | |
|     else
 | |
|       rspamd_logger.errx(rspamd_config, 'invalid score for %1', words[2])
 | |
|     end
 | |
| 
 | |
|     return 0
 | |
|   end
 | |
| 
 | |
|   local skip_to_endif = false
 | |
|   local if_nested = 0
 | |
|   for l in f:lines() do
 | |
|     (function ()
 | |
|     l = lua_util.rspamd_str_trim(l)
 | |
|     -- Replace bla=~/re/ with bla =~ /re/ (#2372)
 | |
|     l = l:gsub('([^%s])%s*([=!]~)%s*([^%s])', '%1 %2 %3')
 | |
| 
 | |
|     if string.len(l) == 0 or string.sub(l, 1, 1) == '#' then
 | |
|       return
 | |
|     end
 | |
| 
 | |
|     -- Unbalanced if/endif
 | |
|     if if_nested < 0 then if_nested = 0 end
 | |
|     if skip_to_endif then
 | |
|       if string.match(l, '^endif') then
 | |
|         if_nested = if_nested - 1
 | |
| 
 | |
|         if if_nested == 0 then
 | |
|           skip_to_endif = false
 | |
|         end
 | |
|       elseif string.match(l, '^if') then
 | |
|         if_nested = if_nested + 1
 | |
|       elseif string.match(l, '^else') then
 | |
|         -- Else counterpart for if
 | |
|         skip_to_endif = false
 | |
|       end
 | |
|       table.insert(complicated, l)
 | |
|       return
 | |
|     else
 | |
|       if string.match(l, '^ifplugin') then
 | |
|         skip_to_endif = true
 | |
|         if_nested = if_nested + 1
 | |
|         table.insert(complicated, l)
 | |
|       elseif string.match(l, '^if !plugin%(') then
 | |
|          skip_to_endif = true
 | |
|          if_nested = if_nested + 1
 | |
|         table.insert(complicated, l)
 | |
|       elseif string.match(l, '^if') then
 | |
|         -- Unknown if
 | |
|         skip_to_endif = true
 | |
|         if_nested = if_nested + 1
 | |
|         table.insert(complicated, l)
 | |
|       elseif string.match(l, '^else') then
 | |
|         -- Else counterpart for if
 | |
|         skip_to_endif = true
 | |
|         table.insert(complicated, l)
 | |
|       elseif string.match(l, '^endif') then
 | |
|         if_nested = if_nested - 1
 | |
|         table.insert(complicated, l)
 | |
|       end
 | |
|     end
 | |
| 
 | |
|     -- Skip comments
 | |
|     local words = fun.totable(fun.take_while(
 | |
|       function(w) return string.sub(w, 1, 1) ~= '#' end,
 | |
|       fun.filter(function(w)
 | |
|           return w ~= "" end,
 | |
|       fun.iter(split(l)))))
 | |
| 
 | |
|     if words[1] == "header" then
 | |
|       -- header SYMBOL Header ~= /regexp/
 | |
|       if valid_rule then
 | |
|         insert_cur_rule()
 | |
|       end
 | |
|       if words[4] and (words[4] == '=~' or words[4] == '!~') then
 | |
|         cur_rule['type'] = 'header'
 | |
|         cur_rule['symbol'] = words[2]
 | |
| 
 | |
|         if words[4] == '!~' then
 | |
|           table.insert(complicated, l)
 | |
|           return
 | |
|         end
 | |
| 
 | |
|         cur_rule['re_expr'] = words_to_re(words, 4)
 | |
|         local unset_comp = string.find(cur_rule['re_expr'], '%s+%[if%-unset:')
 | |
|         if unset_comp then
 | |
|           table.insert(complicated, l)
 | |
|           return
 | |
|         end
 | |
| 
 | |
|         cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
 | |
| 
 | |
|         if not cur_rule['re'] then
 | |
|           rspamd_logger.warnx(rspamd_config, "Cannot parse regexp '%1' for %2",
 | |
|             cur_rule['re_expr'], cur_rule['symbol'])
 | |
|           table.insert(complicated, l)
 | |
|           return
 | |
|         else
 | |
|           handle_header_def(words[3], cur_rule)
 | |
|           if not cur_rule['ordinary'] then
 | |
|             table.insert(complicated, l)
 | |
|             return
 | |
|           end
 | |
|         end
 | |
| 
 | |
|         valid_rule = true
 | |
|       else
 | |
|         table.insert(complicated, l)
 | |
|         return
 | |
|       end
 | |
|     elseif words[1] == "body" then
 | |
|       -- body SYMBOL /regexp/
 | |
|       if valid_rule then
 | |
|         insert_cur_rule()
 | |
|       end
 | |
| 
 | |
|       cur_rule['symbol'] = words[2]
 | |
|       if words[3] and (string.sub(words[3], 1, 1) == '/'
 | |
|           or string.sub(words[3], 1, 1) == 'm') then
 | |
|         cur_rule['type'] = 'sabody'
 | |
|         cur_rule['re_expr'] = words_to_re(words, 2)
 | |
|         cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
 | |
|         if cur_rule['re'] then
 | |
| 
 | |
|           valid_rule = true
 | |
|         end
 | |
|       else
 | |
|         -- might be function
 | |
|         table.insert(complicated, l)
 | |
|         return
 | |
|       end
 | |
|     elseif words[1] == "rawbody" then
 | |
|       -- body SYMBOL /regexp/
 | |
|       if valid_rule then
 | |
|         insert_cur_rule()
 | |
|       end
 | |
| 
 | |
|       cur_rule['symbol'] = words[2]
 | |
|       if words[3] and (string.sub(words[3], 1, 1) == '/'
 | |
|           or string.sub(words[3], 1, 1) == 'm') then
 | |
|         cur_rule['type'] = 'sarawbody'
 | |
|         cur_rule['re_expr'] = words_to_re(words, 2)
 | |
|         cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
 | |
|         if cur_rule['re'] then
 | |
|           valid_rule = true
 | |
|         end
 | |
|       else
 | |
|         table.insert(complicated, l)
 | |
|         return
 | |
|       end
 | |
|     elseif words[1] == "full" then
 | |
|       -- body SYMBOL /regexp/
 | |
|       if valid_rule then
 | |
|         insert_cur_rule()
 | |
|       end
 | |
| 
 | |
|       cur_rule['symbol'] = words[2]
 | |
| 
 | |
|       if words[3] and (string.sub(words[3], 1, 1) == '/'
 | |
|           or string.sub(words[3], 1, 1) == 'm') then
 | |
|         cur_rule['type'] = 'message'
 | |
|         cur_rule['re_expr'] = words_to_re(words, 2)
 | |
|         cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
 | |
|         cur_rule['raw'] = true
 | |
|         if cur_rule['re'] then
 | |
|           valid_rule = true
 | |
|         end
 | |
|       else
 | |
|         table.insert(complicated, l)
 | |
|         return
 | |
|       end
 | |
|     elseif words[1] == "uri" then
 | |
|       -- uri SYMBOL /regexp/
 | |
|       if valid_rule then
 | |
|         insert_cur_rule()
 | |
|       end
 | |
|       cur_rule['type'] = 'uri'
 | |
|       cur_rule['symbol'] = words[2]
 | |
|       cur_rule['re_expr'] = words_to_re(words, 2)
 | |
|       cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
 | |
|       if cur_rule['re'] and cur_rule['symbol'] then
 | |
|         valid_rule = true
 | |
|       else
 | |
|         table.insert(complicated, l)
 | |
|         return
 | |
|       end
 | |
|     elseif words[1] == "meta" then
 | |
|       -- meta SYMBOL expression
 | |
|       if valid_rule then
 | |
|         insert_cur_rule()
 | |
|       end
 | |
|       table.insert(complicated, l)
 | |
|       return
 | |
|     elseif words[1] == "describe" and valid_rule then
 | |
|       cur_rule['description'] = words_to_re(words, 2)
 | |
|     elseif words[1] == "score" then
 | |
|       scores[words[2]] = parse_score(words)
 | |
|     else
 | |
|       table.insert(complicated, l)
 | |
|       return
 | |
|     end
 | |
|     end)()
 | |
|   end
 | |
|   if valid_rule then
 | |
|     insert_cur_rule()
 | |
|   end
 | |
| end
 | |
| 
 | |
| for _,matched in ipairs(arg) do
 | |
|   local f = io.open(matched, "r")
 | |
|   if f then
 | |
|     rspamd_logger.messagex(rspamd_config, 'loading SA rules from %s', matched)
 | |
|     process_sa_conf(f)
 | |
|   else
 | |
|     rspamd_logger.errx(rspamd_config, "cannot open %1", matched)
 | |
|   end
 | |
| end
 | |
| 
 | |
| local multimap_conf = {}
 | |
| 
 | |
| local function handle_rule(what, syms, hdr)
 | |
|   local mtype
 | |
|   local filter
 | |
|   local fname
 | |
|   local header
 | |
|   local sym = what:upper()
 | |
|   if what == 'sabody' then
 | |
|     mtype = 'content'
 | |
|     fname = 'body_re.map'
 | |
|     filter = 'oneline'
 | |
|   elseif what == 'sarawbody' then
 | |
|     fname = 'raw_body_re.map'
 | |
|     mtype = 'content'
 | |
|     filter = 'rawtext'
 | |
|   elseif what == 'full' then
 | |
|     fname = 'full_re.map'
 | |
|     mtype = 'content'
 | |
|     filter = 'full'
 | |
|   elseif what == 'uri' then
 | |
|     fname = 'uri_re.map'
 | |
|     mtype = 'url'
 | |
|     filter = 'full'
 | |
|   elseif what == 'header' then
 | |
|     fname = ('hdr_' .. hdr .. '_re.map'):lower()
 | |
|     mtype = 'header'
 | |
|     header = hdr
 | |
|     sym = sym .. '_' .. hdr:upper()
 | |
|   else
 | |
|     rspamd_logger.errx('unknown type: %s', what)
 | |
|     return
 | |
|   end
 | |
|   local conf = {
 | |
|     type = mtype,
 | |
|     filter = filter,
 | |
|     symbol = 'SA_MAP_AUTO_' .. sym,
 | |
|     regexp = true,
 | |
|     map = fname,
 | |
|     header = header,
 | |
|     symbols = {}
 | |
|   }
 | |
|   local re_file = io.open(fname, 'w')
 | |
| 
 | |
|   for k,r in pairs(syms) do
 | |
|     local score = 0.0
 | |
|     if scores[k] then
 | |
|       score = scores[k]
 | |
|     end
 | |
|     re_file:write(string.format('/%s/ %s:%f\n', tostring(r.re), k, score))
 | |
|     table.insert(conf.symbols, k)
 | |
|   end
 | |
| 
 | |
|   re_file:close()
 | |
| 
 | |
|   multimap_conf[sym:lower()] = conf
 | |
|   rspamd_logger.messagex('stored %s regexp in %s', sym:lower(), fname)
 | |
| end
 | |
| 
 | |
| for k,v in pairs(rules) do
 | |
|   if k == 'header' then
 | |
|     for h,r in pairs(v) do
 | |
|       handle_rule(k, r, h)
 | |
|     end
 | |
|   else
 | |
|     handle_rule(k, v)
 | |
|   end
 | |
| end
 | |
| 
 | |
| local out = ucl.to_format(multimap_conf, 'ucl')
 | |
| local mmap_conf = io.open('auto_multimap.conf', 'w')
 | |
| mmap_conf:write(out)
 | |
| mmap_conf:close()
 | |
| rspamd_logger.messagex('stored multimap conf in %s', 'auto_multimap.conf')
 | |
| 
 | |
| local sa_remain = io.open('auto_sa.conf', 'w')
 | |
| fun.each(function(l) 
 | |
|   sa_remain:write(l)
 | |
|   sa_remain:write('\n')
 | |
| end, fun.filter(function(l) return not string.match(l, '^%s+$') end, complicated))
 | |
| sa_remain:close()
 | |
| rspamd_logger.messagex('stored sa remains conf in %s', 'auto_sa.conf')
 |