Skip to content

Commit 7aaba7d

Browse files
committed
Add handling for control characters in site tree exports
Signed-off-by: Nilson Magalhaes Junior <[email protected]>
1 parent c987a4c commit 7aaba7d

File tree

3 files changed

+297
-8
lines changed

3 files changed

+297
-8
lines changed

addOns/exim/CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
1818
- Maintenance changes.
1919

2020
### Fixed
21+
- Sites Tree export now correctly handles node names with newlines and special characters (Issue 8858).
2122
- Import HAR entry sent and elapsed time.
2223
- Duplicate or missing "Save URLs..." entries in the Export menu.
2324
- The "Save All URLs..." export option was saving only the selected URLs.

addOns/exim/src/main/java/org/zaproxy/addon/exim/sites/SitesTreeHandler.java

+115-8
Original file line numberDiff line numberDiff line change
@@ -26,19 +26,26 @@
2626
import java.io.IOException;
2727
import java.io.InputStream;
2828
import java.io.Writer;
29+
import java.nio.charset.StandardCharsets;
2930
import java.util.ArrayList;
31+
import java.util.Base64;
32+
import java.util.Collection;
3033
import java.util.LinkedHashMap;
3134
import java.util.List;
35+
import java.util.Map;
3236
import org.apache.commons.httpclient.URI;
37+
import org.apache.commons.httpclient.URIException;
3338
import org.apache.logging.log4j.LogManager;
3439
import org.apache.logging.log4j.Logger;
3540
import org.parosproxy.paros.Constant;
41+
import org.parosproxy.paros.db.DatabaseException;
3642
import org.parosproxy.paros.model.HistoryReference;
3743
import org.parosproxy.paros.model.Model;
3844
import org.parosproxy.paros.model.SiteMap;
3945
import org.parosproxy.paros.model.SiteNode;
4046
import org.parosproxy.paros.network.HtmlParameter.Type;
4147
import org.parosproxy.paros.network.HttpHeader;
48+
import org.parosproxy.paros.network.HttpMalformedHeaderException;
4249
import org.parosproxy.paros.network.HttpMessage;
4350
import org.parosproxy.paros.network.HttpRequestHeader;
4451
import org.yaml.snakeyaml.DumperOptions;
@@ -57,12 +64,23 @@ public class SitesTreeHandler {
5764
private static final Yaml YAML;
5865

5966
static {
60-
// YAML is used for encoding
67+
// YAML is used for encoding with improved configuration
6168
DumperOptions options = new DumperOptions();
6269
options.setDefaultFlowStyle(DumperOptions.FlowStyle.BLOCK);
6370
options.setPrettyFlow(true);
71+
options.setIndent(2);
72+
options.setIndicatorIndent(0);
73+
options.setWidth(Integer.MAX_VALUE); // Prevent wrapping
74+
options.setAllowUnicode(true); // Better Unicode handling
75+
options.setNonPrintableStyle(
76+
DumperOptions.NonPrintableStyle.ESCAPE); // Escape problematic chars
77+
6478
Representer representer = new Representer(options);
6579
representer.setDefaultScalarStyle(DumperOptions.ScalarStyle.DOUBLE_QUOTED);
80+
81+
// For handling special chars
82+
representer.getPropertyUtils().setSkipMissingProperties(true);
83+
6684
YAML = new Yaml(representer, options);
6785
}
6886

@@ -94,12 +112,32 @@ private static void outputKV(
94112
}
95113
fw.write(key);
96114
fw.write(": ");
97-
fw.write(YAML.dump(value));
115+
116+
// Convert value to YAML and handle formatting
117+
Object sanitizedValue = sanitizeForYaml(value);
118+
String yamlValue = YAML.dump(sanitizedValue).trim();
119+
120+
// For simple single-line values
121+
if (!yamlValue.contains("\n")) {
122+
fw.write(yamlValue);
123+
fw.newLine(); // Add consistent newline
124+
} else {
125+
// For multi-line values, handle indentation
126+
fw.newLine(); // Start value on next line
127+
String extraIndent = indent + (first ? "- " : " ").replaceAll("\\.", " ") + " ";
128+
String[] lines = yamlValue.split("\n");
129+
for (String line : lines) {
130+
fw.write(extraIndent);
131+
fw.write(line);
132+
fw.newLine();
133+
}
134+
}
98135
}
99136

100137
private static void outputNode(
101138
BufferedWriter fw, SiteNode node, int level, ExporterResult result) throws IOException {
102-
// We could create a set of data structures and use snakeyaml, but the format is very simple
139+
// We could create a set of data structures and use snakeyaml, but the format is
140+
// very simple
103141
// and this is much more memory efficient - it still uses snakeyaml for encoding
104142
String indent = " ".repeat(level * 2);
105143
HistoryReference href = node.getHistoryReference();
@@ -144,7 +182,7 @@ private static void outputNode(
144182
});
145183
outputKV(fw, indent, false, EximSiteNode.DATA_KEY, sb.toString());
146184
}
147-
} catch (Exception e) {
185+
} catch (IOException | DatabaseException e) {
148186
LOGGER.error(e.getMessage(), e);
149187
}
150188
}
@@ -213,7 +251,7 @@ public static void pruneSiteNodes(EximSiteNode node, PruneSiteResult result, Sit
213251
sn.getChildCount());
214252
}
215253
}
216-
} catch (Exception e) {
254+
} catch (NullPointerException | URIException | HttpMalformedHeaderException e) {
217255
LOGGER.error(e.getMessage(), e);
218256
}
219257
}
@@ -233,18 +271,87 @@ public static PruneSiteResult pruneSiteNodes(File file) {
233271

234272
protected static PruneSiteResult pruneSiteNodes(InputStream is, SiteMap siteMap) {
235273
PruneSiteResult res = new PruneSiteResult();
236-
// Don't load yaml using the Constructor class - that throws exceptions that don't give
274+
// Don't load yaml using the Constructor class - that throws exceptions that
275+
// don't give
237276
// enough info
238277
Yaml yaml = new Yaml(new LoaderOptions());
239278

240279
Object obj = yaml.load(is);
241-
if (obj instanceof ArrayList<?>) {
242-
ArrayList<?> list = (ArrayList<?>) obj;
280+
if (obj instanceof ArrayList<?> list) {
243281
EximSiteNode rootNode = new EximSiteNode((LinkedHashMap<?, ?>) list.get(0));
244282
pruneSiteNodes(rootNode, res, siteMap);
245283
} else {
246284
res.setError(Constant.messages.getString("exim.sites.error.prune.badformat"));
247285
}
248286
return res;
249287
}
288+
289+
private static Object sanitizeForYaml(Object value) {
290+
if (value == null) {
291+
return "";
292+
}
293+
294+
if (value instanceof String strValue) {
295+
296+
// Remove control characters that might break YAML
297+
strValue = strValue.replaceAll("[\\p{Cntrl}&&[^\r\n\t]]", "");
298+
299+
// Handle known problematic sequences
300+
strValue = strValue.replace("\u0000", "");
301+
302+
// For especially problematic strings, consider Base64 encoding
303+
if (containsProhibitedYamlCharacters(strValue)) {
304+
return Base64.getEncoder()
305+
.encodeToString(strValue.getBytes(StandardCharsets.UTF_8));
306+
}
307+
308+
return strValue;
309+
} else if (value instanceof Map) {
310+
// Process map values recursively
311+
Map<Object, Object> sanitizedMap = new LinkedHashMap<>();
312+
((Map<?, ?>) value)
313+
.forEach((k, v) -> sanitizedMap.put(sanitizeForYaml(k), sanitizeForYaml(v)));
314+
return sanitizedMap;
315+
} else if (value instanceof Collection) {
316+
// Process collection values recursively
317+
List<Object> sanitizedList = new ArrayList<>();
318+
((Collection<?>) value).forEach(item -> sanitizedList.add(sanitizeForYaml(item)));
319+
return sanitizedList;
320+
}
321+
322+
// For other types, return as is
323+
return value;
324+
}
325+
326+
private static boolean containsProhibitedYamlCharacters(String inputText) {
327+
// Character code constants
328+
final int TAB = 9;
329+
final int LINE_FEED = 10;
330+
final int CARRIAGE_RETURN = 13;
331+
final int CONTROL_CHARS_UPPER_BOUND = 32;
332+
final int LINE_SEPARATOR = 0x2028;
333+
final int PARAGRAPH_SEPARATOR = 0x2029;
334+
final int BYTE_ORDER_MARK = 0xFEFF;
335+
final int SURROGATE_PAIR_START = 0xD800;
336+
final int SURROGATE_PAIR_END = 0xDFFF;
337+
338+
// Check for characters known to cause YAML issues
339+
return inputText
340+
.chars()
341+
.anyMatch(
342+
characterCode ->
343+
(characterCode < CONTROL_CHARS_UPPER_BOUND
344+
&& characterCode != TAB
345+
&& characterCode != LINE_FEED
346+
&& characterCode != CARRIAGE_RETURN)
347+
|| // Control chars except tab, LF, CR
348+
(characterCode == LINE_SEPARATOR)
349+
|| (characterCode == PARAGRAPH_SEPARATOR)
350+
|| // Line/paragraph separators
351+
(characterCode == BYTE_ORDER_MARK)
352+
|| // BOM (Byte Order Mark)
353+
(characterCode >= SURROGATE_PAIR_START
354+
&& characterCode
355+
<= SURROGATE_PAIR_END)); // Surrogate pairs
356+
}
250357
}

0 commit comments

Comments
 (0)