PBHtmlUtils.java
package com.bonitasoft.processbuilder.extension;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Utility class providing HTML manipulation methods for email content processing.
* <p>
* This class provides methods to:
* </p>
* <ul>
* <li>Convert plain text to HTML format with selective XSS protection</li>
* <li>Apply email templates by replacing content placeholders</li>
* <li>Sanitize content by removing dangerous script tags and event handlers</li>
* </ul>
* <p>
* This class is designed to be used from Groovy scripts in Bonita processes,
* where the template and DAO operations are handled externally.
* </p>
* <p>
* <strong>Security Note:</strong> This class implements selective XSS protection
* that removes script tags and JavaScript event handlers while preserving
* legitimate HTML tags like {@code <a href="...">} links.
* </p>
*
* @author Bonitasoft
* @since 1.0
*/
public final class PBHtmlUtils {
/**
* Logger for this class.
*/
private static final Logger LOGGER = LoggerFactory.getLogger(PBHtmlUtils.class);
/**
* The placeholder pattern for content substitution in email templates.
* Matches {{content}} with optional whitespace inside the braces.
*/
private static final Pattern CONTENT_PLACEHOLDER_PATTERN = Pattern.compile("\\{\\{\\s*content\\s*\\}\\}");
/**
* Pattern to match and remove script tags and their content.
* Case-insensitive to catch variations like SCRIPT, Script, etc.
* Matches both opening and closing tags with any attributes.
*/
private static final Pattern SCRIPT_TAG_PATTERN = Pattern.compile(
"<script[^>]*>.*?</script>",
Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
/**
* Pattern to match and remove self-closing script tags.
*/
private static final Pattern SCRIPT_SELF_CLOSING_PATTERN = Pattern.compile(
"<script[^>]*/?>",
Pattern.CASE_INSENSITIVE);
/**
* Pattern to match and remove JavaScript event handler attributes.
* Matches attributes like onclick, onload, onerror, onmouseover, etc.
* Handles both double and single quoted values.
*/
private static final Pattern EVENT_HANDLER_PATTERN = Pattern.compile(
"\\s+on\\w+\\s*=\\s*(\"[^\"]*\"|'[^']*'|[^\\s>]*)",
Pattern.CASE_INSENSITIVE);
/**
* Pattern to match javascript: protocol in href attributes.
*/
private static final Pattern JAVASCRIPT_PROTOCOL_PATTERN = Pattern.compile(
"href\\s*=\\s*[\"']?\\s*javascript:[^\"'\\s>]*[\"']?",
Pattern.CASE_INSENSITIVE);
/**
* Default placeholder value when template is missing the content placeholder.
*/
private static final String DEFAULT_PLACEHOLDER = "{{content}}";
/**
* Private constructor to prevent instantiation of this utility class.
*
* @throws UnsupportedOperationException always, to enforce the utility pattern.
*/
private PBHtmlUtils() {
throw new UnsupportedOperationException(
"This is a " + this.getClass().getSimpleName() + " class and cannot be instantiated.");
}
/**
* Converts text content to HTML format with selective XSS protection.
* <p>
* This method performs the following transformations:
* </p>
* <ul>
* <li>Removes script tags and their content (XSS protection)</li>
* <li>Removes JavaScript event handler attributes (onclick, onload, etc.)</li>
* <li>Removes javascript: protocol from href attributes</li>
* <li>Converts literal escape sequences from JSON (\\n, \\r, \\t) to HTML equivalents</li>
* <li>Converts real control characters (\n, \r, \t) to HTML equivalents</li>
* <li>Converts multiple consecutive spaces to non-breaking spaces</li>
* </ul>
* <p>
* <strong>Security Note:</strong> This method uses selective XSS protection instead of
* full HTML escaping. This allows legitimate HTML tags like {@code <a href="...">} to
* be preserved while removing dangerous content like script tags and event handlers.
* </p>
*
* @param text The text to convert to HTML format.
* @return The HTML-formatted text with XSS protection, or null if input is null,
* or empty string if input is empty.
*/
public static String convertTextToHtml(String text) {
if (text == null) {
return null;
}
if (text.isEmpty()) {
return text;
}
String result = text;
// Step 1: Selective XSS protection (instead of full HTML escaping)
result = sanitizeXss(result);
// Step 2: Convert literal sequences (from JSON) - must be done before real control characters
result = result.replace("\\r\\n", "<br/>");
result = result.replace("\\n", "<br/>");
result = result.replace("\\r", "<br/>");
// Step 3: Convert real control characters
result = result.replace("\r\n", "<br/>");
result = result.replace("\n", "<br/>");
result = result.replace("\r", "<br/>");
// Step 4: Convert literal tabs (\\t) and real tabs (\t) to four non-breaking spaces
result = result.replace("\\t", " ");
result = result.replace("\t", " ");
// Step 5: Convert multiple consecutive spaces to preserve formatting
result = preserveMultipleSpaces(result);
LOGGER.debug("Converted text to HTML. Input length: {}, Output length: {}", text.length(), result.length());
return result;
}
/**
* Sanitizes the input string by removing XSS attack vectors.
* <p>
* This method removes:
* </p>
* <ul>
* <li>Script tags and their content</li>
* <li>JavaScript event handler attributes (onclick, onload, onerror, etc.)</li>
* <li>javascript: protocol in href attributes</li>
* </ul>
* <p>
* This allows legitimate HTML tags like anchors ({@code <a href="...">}) to remain
* while protecting against XSS attacks.
* </p>
*
* @param text The text to sanitize.
* @return The sanitized text with XSS vectors removed.
*/
static String sanitizeXss(String text) {
if (text == null || text.isEmpty()) {
return text;
}
String result = text;
// Remove script tags and their content
result = SCRIPT_TAG_PATTERN.matcher(result).replaceAll("");
// Remove self-closing or orphan script tags
result = SCRIPT_SELF_CLOSING_PATTERN.matcher(result).replaceAll("");
// Remove JavaScript event handlers (onclick, onload, onerror, etc.)
result = EVENT_HANDLER_PATTERN.matcher(result).replaceAll("");
// Remove javascript: protocol from href attributes
result = JAVASCRIPT_PROTOCOL_PATTERN.matcher(result).replaceAll("href=\"#\"");
LOGGER.debug("Sanitized XSS content. Input length: {}, Output length: {}", text.length(), result.length());
return result;
}
/**
* Applies an email template by replacing the {{content}} placeholder with the provided content.
* <p>
* The method expects the template to contain a {@code {{content}}} placeholder
* (with optional whitespace inside the braces). The content is inserted at this location.
* </p>
* <p>
* <strong>Important:</strong> This method assumes the content has already been converted
* to HTML format using {@link #convertTextToHtml(String)} if it was plain text.
* </p>
*
* @param template The HTML email template containing the {{content}} placeholder.
* @param content The HTML content to insert into the template.
* @return The complete email HTML with the content inserted, or:
* <ul>
* <li>The original content if template is null or empty</li>
* <li>The template unchanged if content is null</li>
* <li>The template with placeholder replaced by empty string if content is empty</li>
* </ul>
*/
public static String applyEmailTemplate(String template, String content) {
// If template is null or empty, return the content as-is
if (template == null || template.isEmpty()) {
LOGGER.warn("Email template is null or empty. Returning content without template wrapper.");
return content;
}
// If content is null, return the template unchanged
if (content == null) {
LOGGER.warn("Content is null. Returning template with placeholder unchanged.");
return template;
}
// Check if template contains the placeholder
Matcher matcher = CONTENT_PLACEHOLDER_PATTERN.matcher(template);
if (!matcher.find()) {
LOGGER.warn("Template does not contain {{{{content}}}} placeholder. "
+ "Appending content at the end of template.");
return template + content;
}
// Replace the placeholder with the content
String result = matcher.replaceFirst(Matcher.quoteReplacement(content));
LOGGER.debug("Applied email template. Template length: {}, Content length: {}, Result length: {}",
template.length(), content.length(), result.length());
return result;
}
/**
* Prepares email content by converting text to HTML and applying the email template.
* <p>
* This is a convenience method that combines {@link #convertTextToHtml(String)} and
* {@link #applyEmailTemplate(String, String)} in a single call.
* </p>
* <p>
* <strong>Important:</strong> If the email template is null, empty, or does not contain
* the {@code {{content}}} placeholder, this method returns the original text content
* without any HTML conversion or template application.
* </p>
* <p>
* <strong>Usage from Bonita Groovy script:</strong>
* </p>
* <pre>{@code
* // In your Groovy script:
* String finalResult = PBStringUtils.resolveTemplateVariables(originalResult, dataResolver)
*
* // Get the email template from PBConfiguration
* PBConfiguration pbConfiguration = pBConfigurationDAO.findByFullNameAndRefEntityTypeName(
* SmtpType.EMAILTEMPLATE.name(),
* ConfigurationType.SMTP.name()
* )
* String emailTemplateString = pbConfiguration.getConfigValue()
*
* // Apply the template to the content
* String emailBody = PBHtmlUtils.prepareEmailContent(finalResult, emailTemplateString)
* }</pre>
*
* @param textContent The plain text content to be included in the email.
* @param emailTemplate The HTML email template containing {{content}} placeholder.
* @return The complete email HTML ready to be sent, or the original textContent
* if the template is invalid (null, empty, or missing placeholder).
*/
public static String prepareEmailContent(String textContent, String emailTemplate) {
// If template is null, empty, or doesn't contain the placeholder, return content as-is
if (!isValidTemplate(emailTemplate)) {
LOGGER.warn("Email template is invalid (null, empty, or missing {{content}} placeholder). "
+ "Returning original content without HTML conversion.");
return textContent;
}
// Convert text content to HTML format
String htmlContent = convertTextToHtml(textContent);
// Apply the email template
return applyEmailTemplate(emailTemplate, htmlContent);
}
/**
* Checks if the given template is valid for email content processing.
* <p>
* A valid template must be non-null, non-empty, and contain the {@code {{content}}} placeholder.
* </p>
*
* @param template The template to validate.
* @return {@code true} if the template is valid, {@code false} otherwise.
*/
static boolean isValidTemplate(String template) {
if (template == null || template.isEmpty()) {
return false;
}
return CONTENT_PLACEHOLDER_PATTERN.matcher(template).find();
}
/**
* Escapes HTML special characters in the given string.
* <p>
* The following characters are escaped:
* <ul>
* <li>{@code &} becomes {@code &}</li>
* <li>{@code <} becomes {@code <}</li>
* <li>{@code >} becomes {@code >}</li>
* <li>{@code "} becomes {@code "}</li>
* <li>{@code '} becomes {@code '}</li>
* </ul>
* </p>
* <p>
* <strong>Note:</strong> This method is kept for backward compatibility and for cases
* where full HTML escaping is needed. The main {@link #convertTextToHtml(String)} method
* now uses selective XSS protection instead of full escaping.
* </p>
*
* @param text The text to escape.
* @return The escaped text.
*/
static String escapeHtmlSpecialChars(String text) {
if (text == null || text.isEmpty()) {
return text;
}
StringBuilder result = new StringBuilder(text.length() + 16);
for (int i = 0; i < text.length(); i++) {
char c = text.charAt(i);
switch (c) {
case '&':
result.append("&");
break;
case '<':
result.append("<");
break;
case '>':
result.append(">");
break;
case '"':
result.append(""");
break;
case '\'':
result.append("'");
break;
default:
result.append(c);
}
}
return result.toString();
}
/**
* Preserves multiple consecutive spaces by converting them to non-breaking spaces.
* <p>
* In HTML, multiple consecutive spaces are collapsed to a single space.
* This method alternates between regular spaces and non-breaking spaces
* to preserve the visual formatting while remaining HTML-compliant.
* </p>
*
* @param text The text with potential multiple spaces.
* @return The text with multiple spaces preserved for HTML rendering.
*/
static String preserveMultipleSpaces(String text) {
if (text == null || text.isEmpty()) {
return text;
}
StringBuilder result = new StringBuilder(text.length() + 16);
boolean previousWasSpace = false;
for (int i = 0; i < text.length(); i++) {
char c = text.charAt(i);
if (c == ' ') {
if (previousWasSpace) {
result.append(" ");
} else {
result.append(' ');
previousWasSpace = true;
}
} else {
result.append(c);
previousWasSpace = false;
}
}
return result.toString();
}
}