De multe ori este important ca lista de utilizatori ai unui site sau de abonati la un newsletter sa fie “clean”. Prin clean ma refer in primul rand la eliminarea adreselor inexistente.
Conform RFC, erorile de tip 5.1.1 reprezinta erori permanente (Bad destination mailbox address). Daca toti ar fi RFC Compliant, totul ar fi foarte simplu, dar din pacate multe servere de mail (SMTP) au propriile error codes. Noi la Newsman.ro incercam sa “construim” o lista de mesaje de eroare parsabile care identifica o adresa de mail inexistenta. Solutia noastra este listata mai jos. Folosim Python, dar in mod sigur mesajele RAW email pot fi parsate si in alte limbaje de programare. De exemplu in PHP5 mesajele email pot fi parsate foarte bine si rapid folosind libraria Zend.
Python Bounce Detector:
import email, re NO_USER_FOUND_MSGS = [ "user unknown", # web.de "unknown local part", # web.de "user is unknown", # gmx.de "MAILBOX NOT FOUND", # aol.com "But, your recipient never logged onto their free AIM Mail account.", # aim.com by AOL re.compile("554 delivery error\: dd.*cannot be delivered.*disabled or discontinued.*yahoo", re.I), # yahoo re.compile("554 delivery error\: dd This user doesn\'t have a.*yahoo", re.I), # yahoo "Requested action not taken: mailbox unavailable", # hotmail "550 unrouteable address", # freenet.de "sorry, that mailbox isn't in my list of allowed mailboxes", # cabanova.com "551 not our customer", # att.net "Addressee unknown", # sbcglobal.net "user not found", # mail.ru "invalid mailbox", # mail.ru "No such user.", # k.ro "DOESN'T EXIST", # k.ro "no such mailbox", # qmail "No Such User", # text "No Such mailbox", # text "Unknown user", # easynet, text "Unknown mailbox", # text "Invalid recipient", # text "Recipient unknown", # text "No such email", # text, apropo.ro "No such mail", # text "unknown recipient", # text "No such person at this address", # as.ro "#5.1.0 Address rejected", # text re.compile("55\d sorry, no mailbox", re.I), # text, qmail "invalid address", # text "User account is unavailable", # text "Recipient not found", # text "User account is unavailable", # text "does not exist", # text "No account by that name here", # text "Mailbox is inactive", # text "Nonexistent user", # text "no mailbox here by that name", # text "mail not found", # text "Mailbox disabled", # text "No such recipient", # text "5.1.1", # text - freemail.hu ] class NoDeliveryStatusException(Exception): pass class WrongDeliveryStatusException(Exception): pass class BounceParser: def __init__(self, raw_msg=None): self.matched_msg = None if raw_msg is not None: self.parse(raw_msg) def parse(self, raw_msg): self._parsed = False self.status = None self.diagnostic_message = None self.action = None self.msg = email.message_from_string(raw_msg) self.dsmsg = None for part in self.msg.get_payload(): if type(part) is str: raise NoDeliveryStatusException("No message/delivery-status part found.") if part.get_content_type() == "message/delivery-status": self.dsmsg = part break if self.dsmsg is None: raise NoDeliveryStatusException("No message/delivery-status part found.") if not self.dsmsg.is_multipart(): self.status = self.dsmsg.get("Status", None) self.diagnostic_message = self.dsmsg.get("Diagnostic-Code", None) self.action = self.dsmsg.get("Action", None) else: for part in self.dsmsg.get_payload(): if part.has_key("Status"): self.status = part.get("Status") if part.has_key("Diagnostic-Code"): self.diagnostic_message = part.get("Diagnostic-Code", None) if part.has_key("Action"): self.action = part.get("Action", None) if self.status is None: raise WrongDeliveryStatusException("No Status: found in message/delivery-status part.") if self.action is None: raise WrongDeliveryStatusException("No Action: found in message/delivery-status part.") if self.diagnostic_message is not None: self.diagnostic_message = self.diagnostic_message.replace("\r", "").replace("\t", " ").replace("\n", " ") self._parsed = True def isParsed(self): return self._parsed def isFailed(self): return self.action.lower() == "failed" def is5xx(self): return self.status.find("5.") == 0 def is4xx(self): return self.status.find("4.") == 0 def getDiagnosticMessage(self): return self.diagnostic_message def isNoUserFound(self): if not self.isFailed(): return False if not self.is5xx(): return False if self.status == "5.1.1": # @see <a href="http://rfc.sunsite.dk/rfc/rfc2034.html">http://rfc.sunsite.dk/rfc/rfc2034.html</a> # 5.1.1 is always no such user / mailbox return True if self.diagnostic_message is None: # Missing diagnostig message => no can do parse :D return False diagnostic_message = self.diagnostic_message.lower().strip() for m in NO_USER_FOUND_MSGS: if type(m) is str: if diagnostic_message.find(m.lower()) != -1: self.matched_msg = m return True else: if m.search(diagnostic_message): return True return False def getAction(self): return self.action def getStatus(self): return self.status def getMatchedMsg(self): return self.matched_msg


