444 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Lua
		
	
	
	
	
	
			
		
		
	
	
			444 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Lua
		
	
	
	
	
	
local fun = require "fun"
 | 
						|
local rspamd_logger = require "rspamd_logger"
 | 
						|
local util = require "rspamd_util"
 | 
						|
local lua_util = require "lua_util"
 | 
						|
local rspamd_regexp = require "rspamd_regexp"
 | 
						|
local ucl = require "ucl"
 | 
						|
 | 
						|
local complicated = {}
 | 
						|
local rules = {}
 | 
						|
local scores = {}
 | 
						|
 | 
						|
local function words_to_re(words, start)
 | 
						|
  return table.concat(fun.totable(fun.drop_n(start, words)), " ");
 | 
						|
end
 | 
						|
 | 
						|
local function split(str, delim)
 | 
						|
  local result = {}
 | 
						|
 | 
						|
  if not delim then
 | 
						|
    delim = '[^%s]+'
 | 
						|
  end
 | 
						|
 | 
						|
  for token in string.gmatch(str, delim) do
 | 
						|
    table.insert(result, token)
 | 
						|
  end
 | 
						|
 | 
						|
  return result
 | 
						|
end
 | 
						|
 | 
						|
local function handle_header_def(hline, cur_rule)
 | 
						|
  --Now check for modifiers inside header's name
 | 
						|
  local hdrs = split(hline, '[^|]+')
 | 
						|
  local hdr_params = {}
 | 
						|
  local cur_param = {}
 | 
						|
  -- Check if an re is an ordinary re
 | 
						|
  local ordinary = true
 | 
						|
 | 
						|
  for _,h in ipairs(hdrs) do
 | 
						|
    if h == 'ALL' or h == 'ALL:raw' then
 | 
						|
      ordinary = false
 | 
						|
    else
 | 
						|
      local args = split(h, '[^:]+')
 | 
						|
      cur_param['strong'] = false
 | 
						|
      cur_param['raw'] = false
 | 
						|
      cur_param['header'] = args[1]
 | 
						|
 | 
						|
      if args[2] then
 | 
						|
        -- We have some ops that are required for the header, so it's not ordinary
 | 
						|
        ordinary = false
 | 
						|
      end
 | 
						|
 | 
						|
      fun.each(function(func)
 | 
						|
          if func == 'addr' then
 | 
						|
            cur_param['function'] = function(str)
 | 
						|
              local addr_parsed = util.parse_addr(str)
 | 
						|
              local ret = {}
 | 
						|
              if addr_parsed then
 | 
						|
                for _,elt in ipairs(addr_parsed) do
 | 
						|
                  if elt['addr'] then
 | 
						|
                    table.insert(ret, elt['addr'])
 | 
						|
                  end
 | 
						|
                end
 | 
						|
              end
 | 
						|
 | 
						|
              return ret
 | 
						|
            end
 | 
						|
          elseif func == 'name' then
 | 
						|
            cur_param['function'] = function(str)
 | 
						|
              local addr_parsed = util.parse_addr(str)
 | 
						|
              local ret = {}
 | 
						|
              if addr_parsed then
 | 
						|
                for _,elt in ipairs(addr_parsed) do
 | 
						|
                  if elt['name'] then
 | 
						|
                    table.insert(ret, elt['name'])
 | 
						|
                  end
 | 
						|
                end
 | 
						|
              end
 | 
						|
 | 
						|
              return ret
 | 
						|
            end
 | 
						|
          elseif func == 'raw' then
 | 
						|
            cur_param['raw'] = true
 | 
						|
          elseif func == 'case' then
 | 
						|
            cur_param['strong'] = true
 | 
						|
          else
 | 
						|
            rspamd_logger.warnx(rspamd_config, 'Function %1 is not supported in %2',
 | 
						|
              func, cur_rule['symbol'])
 | 
						|
          end
 | 
						|
        end, fun.tail(args))
 | 
						|
 | 
						|
        -- Some header rules require splitting to check of multiple headers
 | 
						|
        if cur_param['header'] == 'MESSAGEID' then
 | 
						|
          -- Special case for spamassassin
 | 
						|
          ordinary = false
 | 
						|
        elseif cur_param['header'] == 'ToCc' then
 | 
						|
          ordinary = false
 | 
						|
        else
 | 
						|
          table.insert(hdr_params, cur_param)
 | 
						|
        end
 | 
						|
    end
 | 
						|
 | 
						|
    cur_rule['ordinary'] = ordinary and (not (#hdr_params > 1))
 | 
						|
    cur_rule['header'] = hdr_params
 | 
						|
  end
 | 
						|
end
 | 
						|
 | 
						|
local function process_sa_conf(f)
 | 
						|
  local cur_rule = {}
 | 
						|
  local valid_rule = false
 | 
						|
 | 
						|
  local function insert_cur_rule()
 | 
						|
   if not rules[cur_rule.type] then
 | 
						|
     rules[cur_rule.type] = {}
 | 
						|
   end
 | 
						|
 | 
						|
   local target = rules[cur_rule.type]
 | 
						|
 | 
						|
   if cur_rule.type == 'header' then
 | 
						|
     if not cur_rule.header[1].header then
 | 
						|
      rspamd_logger.errx(rspamd_config, 'bad rule definition: %1', cur_rule)
 | 
						|
      return
 | 
						|
     end
 | 
						|
     if not target[cur_rule.header[1].header] then
 | 
						|
       target[cur_rule.header[1].header] = {}
 | 
						|
     end
 | 
						|
     target = target[cur_rule.header[1].header]
 | 
						|
   end
 | 
						|
 | 
						|
   if not cur_rule['symbol'] then
 | 
						|
     rspamd_logger.errx(rspamd_config, 'bad rule definition: %1', cur_rule)
 | 
						|
     return
 | 
						|
   end
 | 
						|
   target[cur_rule['symbol']] = cur_rule
 | 
						|
   cur_rule = {}
 | 
						|
   valid_rule = false
 | 
						|
  end
 | 
						|
 | 
						|
  local function parse_score(words)
 | 
						|
    if #words == 3 then
 | 
						|
      -- score rule <x>
 | 
						|
      return tonumber(words[3])
 | 
						|
    elseif #words == 6 then
 | 
						|
      -- score rule <x1> <x2> <x3> <x4>
 | 
						|
      -- we assume here that bayes and network are enabled and select <x4>
 | 
						|
      return tonumber(words[6])
 | 
						|
    else
 | 
						|
      rspamd_logger.errx(rspamd_config, 'invalid score for %1', words[2])
 | 
						|
    end
 | 
						|
 | 
						|
    return 0
 | 
						|
  end
 | 
						|
 | 
						|
  local skip_to_endif = false
 | 
						|
  local if_nested = 0
 | 
						|
  for l in f:lines() do
 | 
						|
    (function ()
 | 
						|
    l = lua_util.rspamd_str_trim(l)
 | 
						|
    -- Replace bla=~/re/ with bla =~ /re/ (#2372)
 | 
						|
    l = l:gsub('([^%s])%s*([=!]~)%s*([^%s])', '%1 %2 %3')
 | 
						|
 | 
						|
    if string.len(l) == 0 or string.sub(l, 1, 1) == '#' then
 | 
						|
      return
 | 
						|
    end
 | 
						|
 | 
						|
    -- Unbalanced if/endif
 | 
						|
    if if_nested < 0 then if_nested = 0 end
 | 
						|
    if skip_to_endif then
 | 
						|
      if string.match(l, '^endif') then
 | 
						|
        if_nested = if_nested - 1
 | 
						|
 | 
						|
        if if_nested == 0 then
 | 
						|
          skip_to_endif = false
 | 
						|
        end
 | 
						|
      elseif string.match(l, '^if') then
 | 
						|
        if_nested = if_nested + 1
 | 
						|
      elseif string.match(l, '^else') then
 | 
						|
        -- Else counterpart for if
 | 
						|
        skip_to_endif = false
 | 
						|
      end
 | 
						|
      table.insert(complicated, l)
 | 
						|
      return
 | 
						|
    else
 | 
						|
      if string.match(l, '^ifplugin') then
 | 
						|
        skip_to_endif = true
 | 
						|
        if_nested = if_nested + 1
 | 
						|
        table.insert(complicated, l)
 | 
						|
      elseif string.match(l, '^if !plugin%(') then
 | 
						|
         skip_to_endif = true
 | 
						|
         if_nested = if_nested + 1
 | 
						|
        table.insert(complicated, l)
 | 
						|
      elseif string.match(l, '^if') then
 | 
						|
        -- Unknown if
 | 
						|
        skip_to_endif = true
 | 
						|
        if_nested = if_nested + 1
 | 
						|
        table.insert(complicated, l)
 | 
						|
      elseif string.match(l, '^else') then
 | 
						|
        -- Else counterpart for if
 | 
						|
        skip_to_endif = true
 | 
						|
        table.insert(complicated, l)
 | 
						|
      elseif string.match(l, '^endif') then
 | 
						|
        if_nested = if_nested - 1
 | 
						|
        table.insert(complicated, l)
 | 
						|
      end
 | 
						|
    end
 | 
						|
 | 
						|
    -- Skip comments
 | 
						|
    local words = fun.totable(fun.take_while(
 | 
						|
      function(w) return string.sub(w, 1, 1) ~= '#' end,
 | 
						|
      fun.filter(function(w)
 | 
						|
          return w ~= "" end,
 | 
						|
      fun.iter(split(l)))))
 | 
						|
 | 
						|
    if words[1] == "header" then
 | 
						|
      -- header SYMBOL Header ~= /regexp/
 | 
						|
      if valid_rule then
 | 
						|
        insert_cur_rule()
 | 
						|
      end
 | 
						|
      if words[4] and (words[4] == '=~' or words[4] == '!~') then
 | 
						|
        cur_rule['type'] = 'header'
 | 
						|
        cur_rule['symbol'] = words[2]
 | 
						|
 | 
						|
        if words[4] == '!~' then
 | 
						|
          table.insert(complicated, l)
 | 
						|
          return
 | 
						|
        end
 | 
						|
 | 
						|
        cur_rule['re_expr'] = words_to_re(words, 4)
 | 
						|
        local unset_comp = string.find(cur_rule['re_expr'], '%s+%[if%-unset:')
 | 
						|
        if unset_comp then
 | 
						|
          table.insert(complicated, l)
 | 
						|
          return
 | 
						|
        end
 | 
						|
 | 
						|
        cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
 | 
						|
 | 
						|
        if not cur_rule['re'] then
 | 
						|
          rspamd_logger.warnx(rspamd_config, "Cannot parse regexp '%1' for %2",
 | 
						|
            cur_rule['re_expr'], cur_rule['symbol'])
 | 
						|
          table.insert(complicated, l)
 | 
						|
          return
 | 
						|
        else
 | 
						|
          handle_header_def(words[3], cur_rule)
 | 
						|
          if not cur_rule['ordinary'] then
 | 
						|
            table.insert(complicated, l)
 | 
						|
            return
 | 
						|
          end
 | 
						|
        end
 | 
						|
 | 
						|
        valid_rule = true
 | 
						|
      else
 | 
						|
        table.insert(complicated, l)
 | 
						|
        return
 | 
						|
      end
 | 
						|
    elseif words[1] == "body" then
 | 
						|
      -- body SYMBOL /regexp/
 | 
						|
      if valid_rule then
 | 
						|
        insert_cur_rule()
 | 
						|
      end
 | 
						|
 | 
						|
      cur_rule['symbol'] = words[2]
 | 
						|
      if words[3] and (string.sub(words[3], 1, 1) == '/'
 | 
						|
          or string.sub(words[3], 1, 1) == 'm') then
 | 
						|
        cur_rule['type'] = 'sabody'
 | 
						|
        cur_rule['re_expr'] = words_to_re(words, 2)
 | 
						|
        cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
 | 
						|
        if cur_rule['re'] then
 | 
						|
 | 
						|
          valid_rule = true
 | 
						|
        end
 | 
						|
      else
 | 
						|
        -- might be function
 | 
						|
        table.insert(complicated, l)
 | 
						|
        return
 | 
						|
      end
 | 
						|
    elseif words[1] == "rawbody" then
 | 
						|
      -- body SYMBOL /regexp/
 | 
						|
      if valid_rule then
 | 
						|
        insert_cur_rule()
 | 
						|
      end
 | 
						|
 | 
						|
      cur_rule['symbol'] = words[2]
 | 
						|
      if words[3] and (string.sub(words[3], 1, 1) == '/'
 | 
						|
          or string.sub(words[3], 1, 1) == 'm') then
 | 
						|
        cur_rule['type'] = 'sarawbody'
 | 
						|
        cur_rule['re_expr'] = words_to_re(words, 2)
 | 
						|
        cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
 | 
						|
        if cur_rule['re'] then
 | 
						|
          valid_rule = true
 | 
						|
        end
 | 
						|
      else
 | 
						|
        table.insert(complicated, l)
 | 
						|
        return
 | 
						|
      end
 | 
						|
    elseif words[1] == "full" then
 | 
						|
      -- body SYMBOL /regexp/
 | 
						|
      if valid_rule then
 | 
						|
        insert_cur_rule()
 | 
						|
      end
 | 
						|
 | 
						|
      cur_rule['symbol'] = words[2]
 | 
						|
 | 
						|
      if words[3] and (string.sub(words[3], 1, 1) == '/'
 | 
						|
          or string.sub(words[3], 1, 1) == 'm') then
 | 
						|
        cur_rule['type'] = 'message'
 | 
						|
        cur_rule['re_expr'] = words_to_re(words, 2)
 | 
						|
        cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
 | 
						|
        cur_rule['raw'] = true
 | 
						|
        if cur_rule['re'] then
 | 
						|
          valid_rule = true
 | 
						|
        end
 | 
						|
      else
 | 
						|
        table.insert(complicated, l)
 | 
						|
        return
 | 
						|
      end
 | 
						|
    elseif words[1] == "uri" then
 | 
						|
      -- uri SYMBOL /regexp/
 | 
						|
      if valid_rule then
 | 
						|
        insert_cur_rule()
 | 
						|
      end
 | 
						|
      cur_rule['type'] = 'uri'
 | 
						|
      cur_rule['symbol'] = words[2]
 | 
						|
      cur_rule['re_expr'] = words_to_re(words, 2)
 | 
						|
      cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
 | 
						|
      if cur_rule['re'] and cur_rule['symbol'] then
 | 
						|
        valid_rule = true
 | 
						|
      else
 | 
						|
        table.insert(complicated, l)
 | 
						|
        return
 | 
						|
      end
 | 
						|
    elseif words[1] == "meta" then
 | 
						|
      -- meta SYMBOL expression
 | 
						|
      if valid_rule then
 | 
						|
        insert_cur_rule()
 | 
						|
      end
 | 
						|
      table.insert(complicated, l)
 | 
						|
      return
 | 
						|
    elseif words[1] == "describe" and valid_rule then
 | 
						|
      cur_rule['description'] = words_to_re(words, 2)
 | 
						|
    elseif words[1] == "score" then
 | 
						|
      scores[words[2]] = parse_score(words)
 | 
						|
    else
 | 
						|
      table.insert(complicated, l)
 | 
						|
      return
 | 
						|
    end
 | 
						|
    end)()
 | 
						|
  end
 | 
						|
  if valid_rule then
 | 
						|
    insert_cur_rule()
 | 
						|
  end
 | 
						|
end
 | 
						|
 | 
						|
for _,matched in ipairs(arg) do
 | 
						|
  local f = io.open(matched, "r")
 | 
						|
  if f then
 | 
						|
    rspamd_logger.messagex(rspamd_config, 'loading SA rules from %s', matched)
 | 
						|
    process_sa_conf(f)
 | 
						|
  else
 | 
						|
    rspamd_logger.errx(rspamd_config, "cannot open %1", matched)
 | 
						|
  end
 | 
						|
end
 | 
						|
 | 
						|
local multimap_conf = {}
 | 
						|
 | 
						|
local function handle_rule(what, syms, hdr)
 | 
						|
  local mtype
 | 
						|
  local filter
 | 
						|
  local fname
 | 
						|
  local header
 | 
						|
  local sym = what:upper()
 | 
						|
  if what == 'sabody' then
 | 
						|
    mtype = 'content'
 | 
						|
    fname = 'body_re.map'
 | 
						|
    filter = 'oneline'
 | 
						|
  elseif what == 'sarawbody' then
 | 
						|
    fname = 'raw_body_re.map'
 | 
						|
    mtype = 'content'
 | 
						|
    filter = 'rawtext'
 | 
						|
  elseif what == 'full' then
 | 
						|
    fname = 'full_re.map'
 | 
						|
    mtype = 'content'
 | 
						|
    filter = 'full'
 | 
						|
  elseif what == 'uri' then
 | 
						|
    fname = 'uri_re.map'
 | 
						|
    mtype = 'url'
 | 
						|
    filter = 'full'
 | 
						|
  elseif what == 'header' then
 | 
						|
    fname = ('hdr_' .. hdr .. '_re.map'):lower()
 | 
						|
    mtype = 'header'
 | 
						|
    header = hdr
 | 
						|
    sym = sym .. '_' .. hdr:upper()
 | 
						|
  else
 | 
						|
    rspamd_logger.errx('unknown type: %s', what)
 | 
						|
    return
 | 
						|
  end
 | 
						|
  local conf = {
 | 
						|
    type = mtype,
 | 
						|
    filter = filter,
 | 
						|
    symbol = 'SA_MAP_AUTO_' .. sym,
 | 
						|
    regexp = true,
 | 
						|
    map = fname,
 | 
						|
    header = header,
 | 
						|
    symbols = {}
 | 
						|
  }
 | 
						|
  local re_file = io.open(fname, 'w')
 | 
						|
 | 
						|
  for k,r in pairs(syms) do
 | 
						|
    local score = 0.0
 | 
						|
    if scores[k] then
 | 
						|
      score = scores[k]
 | 
						|
    end
 | 
						|
    re_file:write(string.format('/%s/ %s:%f\n', tostring(r.re), k, score))
 | 
						|
    table.insert(conf.symbols, k)
 | 
						|
  end
 | 
						|
 | 
						|
  re_file:close()
 | 
						|
 | 
						|
  multimap_conf[sym:lower()] = conf
 | 
						|
  rspamd_logger.messagex('stored %s regexp in %s', sym:lower(), fname)
 | 
						|
end
 | 
						|
 | 
						|
for k,v in pairs(rules) do
 | 
						|
  if k == 'header' then
 | 
						|
    for h,r in pairs(v) do
 | 
						|
      handle_rule(k, r, h)
 | 
						|
    end
 | 
						|
  else
 | 
						|
    handle_rule(k, v)
 | 
						|
  end
 | 
						|
end
 | 
						|
 | 
						|
local out = ucl.to_format(multimap_conf, 'ucl')
 | 
						|
local mmap_conf = io.open('auto_multimap.conf', 'w')
 | 
						|
mmap_conf:write(out)
 | 
						|
mmap_conf:close()
 | 
						|
rspamd_logger.messagex('stored multimap conf in %s', 'auto_multimap.conf')
 | 
						|
 | 
						|
local sa_remain = io.open('auto_sa.conf', 'w')
 | 
						|
fun.each(function(l) 
 | 
						|
  sa_remain:write(l)
 | 
						|
  sa_remain:write('\n')
 | 
						|
end, fun.filter(function(l) return not string.match(l, '^%s+$') end, complicated))
 | 
						|
sa_remain:close()
 | 
						|
rspamd_logger.messagex('stored sa remains conf in %s', 'auto_sa.conf')
 |