26
26
import java .io .IOException ;
27
27
import java .io .InputStream ;
28
28
import java .io .Writer ;
29
+ import java .nio .charset .StandardCharsets ;
29
30
import java .util .ArrayList ;
31
+ import java .util .Base64 ;
32
+ import java .util .Collection ;
30
33
import java .util .LinkedHashMap ;
31
34
import java .util .List ;
35
+ import java .util .Map ;
32
36
import org .apache .commons .httpclient .URI ;
37
+ import org .apache .commons .httpclient .URIException ;
33
38
import org .apache .logging .log4j .LogManager ;
34
39
import org .apache .logging .log4j .Logger ;
35
40
import org .parosproxy .paros .Constant ;
41
+ import org .parosproxy .paros .db .DatabaseException ;
36
42
import org .parosproxy .paros .model .HistoryReference ;
37
43
import org .parosproxy .paros .model .Model ;
38
44
import org .parosproxy .paros .model .SiteMap ;
39
45
import org .parosproxy .paros .model .SiteNode ;
40
46
import org .parosproxy .paros .network .HtmlParameter .Type ;
41
47
import org .parosproxy .paros .network .HttpHeader ;
48
+ import org .parosproxy .paros .network .HttpMalformedHeaderException ;
42
49
import org .parosproxy .paros .network .HttpMessage ;
43
50
import org .parosproxy .paros .network .HttpRequestHeader ;
44
51
import org .yaml .snakeyaml .DumperOptions ;
@@ -57,12 +64,23 @@ public class SitesTreeHandler {
57
64
private static final Yaml YAML ;
58
65
59
66
static {
60
- // YAML is used for encoding
67
+ // YAML is used for encoding with improved configuration
61
68
DumperOptions options = new DumperOptions ();
62
69
options .setDefaultFlowStyle (DumperOptions .FlowStyle .BLOCK );
63
70
options .setPrettyFlow (true );
71
+ options .setIndent (2 );
72
+ options .setIndicatorIndent (0 );
73
+ options .setWidth (Integer .MAX_VALUE ); // Prevent wrapping
74
+ options .setAllowUnicode (true ); // Better Unicode handling
75
+ options .setNonPrintableStyle (
76
+ DumperOptions .NonPrintableStyle .ESCAPE ); // Escape problematic chars
77
+
64
78
Representer representer = new Representer (options );
65
79
representer .setDefaultScalarStyle (DumperOptions .ScalarStyle .DOUBLE_QUOTED );
80
+
81
+ // For handling special chars
82
+ representer .getPropertyUtils ().setSkipMissingProperties (true );
83
+
66
84
YAML = new Yaml (representer , options );
67
85
}
68
86
@@ -94,12 +112,32 @@ private static void outputKV(
94
112
}
95
113
fw .write (key );
96
114
fw .write (": " );
97
- fw .write (YAML .dump (value ));
115
+
116
+ // Convert value to YAML and handle formatting
117
+ Object sanitizedValue = sanitizeForYaml (value );
118
+ String yamlValue = YAML .dump (sanitizedValue ).trim ();
119
+
120
+ // For simple single-line values
121
+ if (!yamlValue .contains ("\n " )) {
122
+ fw .write (yamlValue );
123
+ fw .newLine (); // Add consistent newline
124
+ } else {
125
+ // For multi-line values, handle indentation
126
+ fw .newLine (); // Start value on next line
127
+ String extraIndent = indent + (first ? "- " : " " ).replaceAll ("\\ ." , " " ) + " " ;
128
+ String [] lines = yamlValue .split ("\n " );
129
+ for (String line : lines ) {
130
+ fw .write (extraIndent );
131
+ fw .write (line );
132
+ fw .newLine ();
133
+ }
134
+ }
98
135
}
99
136
100
137
private static void outputNode (
101
138
BufferedWriter fw , SiteNode node , int level , ExporterResult result ) throws IOException {
102
- // We could create a set of data structures and use snakeyaml, but the format is very simple
139
+ // We could create a set of data structures and use snakeyaml, but the format is
140
+ // very simple
103
141
// and this is much more memory efficient - it still uses snakeyaml for encoding
104
142
String indent = " " .repeat (level * 2 );
105
143
HistoryReference href = node .getHistoryReference ();
@@ -144,7 +182,7 @@ private static void outputNode(
144
182
});
145
183
outputKV (fw , indent , false , EximSiteNode .DATA_KEY , sb .toString ());
146
184
}
147
- } catch (Exception e ) {
185
+ } catch (IOException | DatabaseException e ) {
148
186
LOGGER .error (e .getMessage (), e );
149
187
}
150
188
}
@@ -213,7 +251,7 @@ public static void pruneSiteNodes(EximSiteNode node, PruneSiteResult result, Sit
213
251
sn .getChildCount ());
214
252
}
215
253
}
216
- } catch (Exception e ) {
254
+ } catch (NullPointerException | URIException | HttpMalformedHeaderException e ) {
217
255
LOGGER .error (e .getMessage (), e );
218
256
}
219
257
}
@@ -233,18 +271,87 @@ public static PruneSiteResult pruneSiteNodes(File file) {
233
271
234
272
protected static PruneSiteResult pruneSiteNodes (InputStream is , SiteMap siteMap ) {
235
273
PruneSiteResult res = new PruneSiteResult ();
236
- // Don't load yaml using the Constructor class - that throws exceptions that don't give
274
+ // Don't load yaml using the Constructor class - that throws exceptions that
275
+ // don't give
237
276
// enough info
238
277
Yaml yaml = new Yaml (new LoaderOptions ());
239
278
240
279
Object obj = yaml .load (is );
241
- if (obj instanceof ArrayList <?>) {
242
- ArrayList <?> list = (ArrayList <?>) obj ;
280
+ if (obj instanceof ArrayList <?> list ) {
243
281
EximSiteNode rootNode = new EximSiteNode ((LinkedHashMap <?, ?>) list .get (0 ));
244
282
pruneSiteNodes (rootNode , res , siteMap );
245
283
} else {
246
284
res .setError (Constant .messages .getString ("exim.sites.error.prune.badformat" ));
247
285
}
248
286
return res ;
249
287
}
288
+
289
+ private static Object sanitizeForYaml (Object value ) {
290
+ if (value == null ) {
291
+ return "" ;
292
+ }
293
+
294
+ if (value instanceof String strValue ) {
295
+
296
+ // Remove control characters that might break YAML
297
+ strValue = strValue .replaceAll ("[\\ p{Cntrl}&&[^\r \n \t ]]" , "" );
298
+
299
+ // Handle known problematic sequences
300
+ strValue = strValue .replace ("\u0000 " , "" );
301
+
302
+ // For especially problematic strings, consider Base64 encoding
303
+ if (containsProhibitedYamlCharacters (strValue )) {
304
+ return Base64 .getEncoder ()
305
+ .encodeToString (strValue .getBytes (StandardCharsets .UTF_8 ));
306
+ }
307
+
308
+ return strValue ;
309
+ } else if (value instanceof Map ) {
310
+ // Process map values recursively
311
+ Map <Object , Object > sanitizedMap = new LinkedHashMap <>();
312
+ ((Map <?, ?>) value )
313
+ .forEach ((k , v ) -> sanitizedMap .put (sanitizeForYaml (k ), sanitizeForYaml (v )));
314
+ return sanitizedMap ;
315
+ } else if (value instanceof Collection ) {
316
+ // Process collection values recursively
317
+ List <Object > sanitizedList = new ArrayList <>();
318
+ ((Collection <?>) value ).forEach (item -> sanitizedList .add (sanitizeForYaml (item )));
319
+ return sanitizedList ;
320
+ }
321
+
322
+ // For other types, return as is
323
+ return value ;
324
+ }
325
+
326
+ private static boolean containsProhibitedYamlCharacters (String inputText ) {
327
+ // Character code constants
328
+ final int TAB = 9 ;
329
+ final int LINE_FEED = 10 ;
330
+ final int CARRIAGE_RETURN = 13 ;
331
+ final int CONTROL_CHARS_UPPER_BOUND = 32 ;
332
+ final int LINE_SEPARATOR = 0x2028 ;
333
+ final int PARAGRAPH_SEPARATOR = 0x2029 ;
334
+ final int BYTE_ORDER_MARK = 0xFEFF ;
335
+ final int SURROGATE_PAIR_START = 0xD800 ;
336
+ final int SURROGATE_PAIR_END = 0xDFFF ;
337
+
338
+ // Check for characters known to cause YAML issues
339
+ return inputText
340
+ .chars ()
341
+ .anyMatch (
342
+ characterCode ->
343
+ (characterCode < CONTROL_CHARS_UPPER_BOUND
344
+ && characterCode != TAB
345
+ && characterCode != LINE_FEED
346
+ && characterCode != CARRIAGE_RETURN )
347
+ || // Control chars except tab, LF, CR
348
+ (characterCode == LINE_SEPARATOR )
349
+ || (characterCode == PARAGRAPH_SEPARATOR )
350
+ || // Line/paragraph separators
351
+ (characterCode == BYTE_ORDER_MARK )
352
+ || // BOM (Byte Order Mark)
353
+ (characterCode >= SURROGATE_PAIR_START
354
+ && characterCode
355
+ <= SURROGATE_PAIR_END )); // Surrogate pairs
356
+ }
250
357
}
0 commit comments