public static String clean(String content) { if(content==null) return content; // unencode (X)HTML entities content = HtmlEscape.unescapeHtml(content); // clean HTML tags, preserving and inducing new line breaks Document document = Jsoup.parse(content); document.outputSettings(new Document.OutputSettings().prettyPrint(false)); document.select("p").prepend("\\n\\n"); document.select("br").append("\\n"); document.select("h1").prepend("\\n\\n"); document.select("h2").prepend("\\n\\n"); document.select("h3").prepend("\\n\\n"); document.select("h4").prepend("\\n\\n"); document.select("h5").prepend("\\n\\n"); document.select("h6").prepend("\\n\\n"); content = document.html().replaceAll("\\\\n", "\n"); content = Jsoup.clean(content, "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false)); // unencode jsoup induced entities content = HtmlEscape.unescapeHtml(content); // normalize newlines content = content.replace("\r\n", "\n"); content = content.replace("\r", "\n"); content = content.replaceAll("(\n){2,}", "\n\n"); return content; }