Dazoot Software

RSS 2.0
Catalin Constantin in Code, How to | Niciun comentariu

Procesare bounce-uri in real time

De multe ori este important ca lista de utilizatori ai unui site sau de abonati la un newsletter sa fie “clean”. Prin clean ma refer in primul rand la eliminarea adreselor inexistente.
Conform RFC, erorile de tip 5.1.1 reprezinta erori permanente (Bad destination mailbox address). Daca toti ar fi RFC Compliant, totul ar fi foarte simplu, dar din pacate multe servere de mail (SMTP) au propriile error codes. Noi la Newsman.ro incercam sa “construim” o lista de mesaje de eroare parsabile care identifica o adresa de mail inexistenta. Solutia noastra este listata mai jos. Folosim Python, dar in mod sigur mesajele RAW email pot fi parsate si in alte limbaje de programare. De exemplu in PHP5 mesajele email pot fi parsate foarte bine si rapid folosind libraria Zend.

Python Bounce Detector:

import email, re
 
NO_USER_FOUND_MSGS = [
    "user unknown", # web.de
    "unknown local part", # web.de
    "user is unknown", # gmx.de
    "MAILBOX NOT FOUND", # aol.com
    "But, your recipient never logged onto their free AIM Mail account.", # aim.com by AOL
    re.compile("554 delivery error\: dd.*cannot be delivered.*disabled or discontinued.*yahoo", re.I), # yahoo
    re.compile("554 delivery error\: dd This user doesn\'t have a.*yahoo", re.I), # yahoo
    "Requested action not taken: mailbox unavailable", # hotmail
    "550 unrouteable address", # freenet.de
    "sorry, that mailbox isn't in my list of allowed mailboxes", # cabanova.com
    "551 not our customer", # att.net
    "Addressee unknown", # sbcglobal.net
    "user not found", # mail.ru
    "invalid mailbox", # mail.ru
    "No such user.", # k.ro
    "DOESN'T EXIST", # k.ro
    "no such mailbox", # qmail
    "No Such User", # text
    "No Such mailbox", # text
    "Unknown user", # easynet, text
    "Unknown mailbox", # text
    "Invalid recipient", # text
    "Recipient unknown", # text
    "No such email", # text, apropo.ro
    "No such mail", # text
    "unknown recipient", # text
    "No such person at this address", # as.ro
    "#5.1.0 Address rejected", # text
    re.compile("55\d sorry, no mailbox", re.I), # text, qmail
    "invalid address", # text
    "User account is unavailable", # text
    "Recipient not found", # text
    "User account is unavailable", # text
    "does not exist", # text
    "No account by that name here", # text
    "Mailbox is inactive", # text
    "Nonexistent user", # text
    "no mailbox here by that name", # text
    "mail not found", # text
    "Mailbox disabled", # text
    "No such recipient", # text
    "5.1.1", # text - freemail.hu
    ]
   
 
class NoDeliveryStatusException(Exception):
    pass
 
class WrongDeliveryStatusException(Exception):
    pass
   
class BounceParser:   
    def __init__(self, raw_msg=None):
        self.matched_msg = None
        if raw_msg is not None:
            self.parse(raw_msg)
   
    def parse(self, raw_msg):
        self._parsed = False
       
        self.status = None
        self.diagnostic_message = None
        self.action = None
       
        self.msg = email.message_from_string(raw_msg)
        self.dsmsg = None
        for part in self.msg.get_payload():
            if type(part) is str:
                raise NoDeliveryStatusException("No message/delivery-status part found.")
            if part.get_content_type() == "message/delivery-status":
                self.dsmsg = part
                break
        if self.dsmsg is None:
            raise NoDeliveryStatusException("No message/delivery-status part found.")
       
        if not self.dsmsg.is_multipart():
            self.status = self.dsmsg.get("Status", None)
            self.diagnostic_message = self.dsmsg.get("Diagnostic-Code", None)
            self.action = self.dsmsg.get("Action", None)
        else:
            for part in self.dsmsg.get_payload():
                if part.has_key("Status"):
                    self.status = part.get("Status")
                if part.has_key("Diagnostic-Code"):
                    self.diagnostic_message = part.get("Diagnostic-Code", None)
                if part.has_key("Action"):
                    self.action = part.get("Action", None)
       
        if self.status is None:
            raise WrongDeliveryStatusException("No Status: found in message/delivery-status part.")
        if self.action is None:
            raise WrongDeliveryStatusException("No Action: found in message/delivery-status part.")
        if self.diagnostic_message is not None:
            self.diagnostic_message = self.diagnostic_message.replace("\r", "").replace("\t", " ").replace("\n", " ")
       
        self._parsed = True
    
    def isParsed(self):
        return self._parsed
 
    def isFailed(self):
        return self.action.lower() == "failed"
    
    def is5xx(self):
        return self.status.find("5.") == 0
   
    def is4xx(self):
        return self.status.find("4.") == 0
   
    def getDiagnosticMessage(self):
        return self.diagnostic_message
   
    def isNoUserFound(self):
        if not self.isFailed():
            return False
       
        if not self.is5xx():
            return False
       
        if self.status == "5.1.1":
            # @see <a href="http://rfc.sunsite.dk/rfc/rfc2034.html">http://rfc.sunsite.dk/rfc/rfc2034.html</a>
            # 5.1.1 is always no such user / mailbox
            return True
       
        if self.diagnostic_message is None:
            # Missing diagnostig message =&gt; no can do parse :D
            return False
           
        diagnostic_message = self.diagnostic_message.lower().strip()
        for m in NO_USER_FOUND_MSGS:
            if type(m) is str:
                if diagnostic_message.find(m.lower()) != -1:
                    self.matched_msg = m
                    return True
            else:
                if m.search(diagnostic_message):
                    return True
       
        return False
   
    def getAction(self):
        return self.action
       
    def getStatus(self):
        return self.status
       
    def getMatchedMsg(self):
        return self.matched_msg