about summary refs log tree commit diff
path: root/nixpkgs/pkgs/development/libraries/expat/2.6.0-fix-tests-flakiness.patch
blob: 9817b1833627f8f226a89e50a6a9dc77cedf6a2b (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
diff --git a/lib/internal.h b/lib/internal.h
index cce71e4c..a217b3f9 100644
--- a/lib/internal.h
+++ b/lib/internal.h
@@ -31,7 +31,7 @@
    Copyright (c) 2016-2023 Sebastian Pipping <sebastian@pipping.org>
    Copyright (c) 2018      Yury Gribov <tetra2005@gmail.com>
    Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
-   Copyright (c) 2023      Sony Corporation / Snild Dolkow <snild@sony.com>
+   Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
    Licensed under the MIT license:
 
    Permission is  hereby granted,  free of charge,  to any  person obtaining
@@ -162,7 +162,7 @@ const char *unsignedCharToPrintable(unsigned char c);
 #endif
 
 extern XML_Bool g_reparseDeferralEnabledDefault; // written ONLY in runtests.c
-extern unsigned int g_parseAttempts;             // used for testing only
+extern unsigned int g_bytesScanned;              // used for testing only
 
 #ifdef __cplusplus
 }
diff --git a/lib/xmlparse.c b/lib/xmlparse.c
index aaf0fa9c..6de99d99 100644
--- a/lib/xmlparse.c
+++ b/lib/xmlparse.c
@@ -38,7 +38,7 @@
    Copyright (c) 2022      Jann Horn <jannh@google.com>
    Copyright (c) 2022      Sean McBride <sean@rogue-research.com>
    Copyright (c) 2023      Owain Davies <owaind@bath.edu>
-   Copyright (c) 2023      Sony Corporation / Snild Dolkow <snild@sony.com>
+   Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
    Licensed under the MIT license:
 
    Permission is  hereby granted,  free of charge,  to any  person obtaining
@@ -630,7 +630,7 @@ static unsigned long getDebugLevel(const char *variableName,
        : ((*((pool)->ptr)++ = c), 1))
 
 XML_Bool g_reparseDeferralEnabledDefault = XML_TRUE; // write ONLY in runtests.c
-unsigned int g_parseAttempts = 0;                    // used for testing only
+unsigned int g_bytesScanned = 0;                     // used for testing only
 
 struct XML_ParserStruct {
   /* The first member must be m_userData so that the XML_GetUserData
@@ -1017,7 +1017,7 @@ callProcessor(XML_Parser parser, const char *start, const char *end,
       return XML_ERROR_NONE;
     }
   }
-  g_parseAttempts += 1;
+  g_bytesScanned += (unsigned)have_now;
   const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr);
   if (ret == XML_ERROR_NONE) {
     // if we consumed nothing, remember what we had on this parse attempt.
diff --git a/tests/basic_tests.c b/tests/basic_tests.c
index 7112a440..a9cc3861 100644
--- a/tests/basic_tests.c
+++ b/tests/basic_tests.c
@@ -5202,13 +5202,7 @@ START_TEST(test_nested_entity_suspend) {
 END_TEST
 
 /* Regression test for quadratic parsing on large tokens */
-START_TEST(test_big_tokens_take_linear_time) {
-  const char *const too_slow_failure_message
-      = "Compared to the baseline runtime of the first test, this test has a "
-        "slowdown of more than <max_slowdown>. "
-        "Please keep increasing the value by 1 until it reliably passes the "
-        "test on your hardware and open a bug sharing that number with us. "
-        "Thanks in advance!";
+START_TEST(test_big_tokens_scale_linearly) {
   const struct {
     const char *pre;
     const char *post;
@@ -5220,65 +5214,57 @@ START_TEST(test_big_tokens_take_linear_time) {
       {"<e><", "/></e>"},                   // big elem name, used to be O(N²)
   };
   const int num_cases = sizeof(text) / sizeof(text[0]);
-  // For the test we need a <max_slowdown> value that is:
-  // (1) big enough that the test passes reliably (avoiding flaky tests), and
-  // (2) small enough that the test actually catches regressions.
-  const int max_slowdown = 15;
   char aaaaaa[4096];
   const int fillsize = (int)sizeof(aaaaaa);
   const int fillcount = 100;
+  const unsigned approx_bytes = fillsize * fillcount; // ignore pre/post.
+  const unsigned max_factor = 4;
+  const unsigned max_scanned = max_factor * approx_bytes;
 
   memset(aaaaaa, 'a', fillsize);
 
   if (! g_reparseDeferralEnabledDefault) {
     return; // heuristic is disabled; we would get O(n^2) and fail.
   }
-#if ! defined(__linux__)
-  if (CLOCKS_PER_SEC < 100000) {
-    // Skip this test if clock() doesn't have reasonably good resolution.
-    // This workaround is primarily targeting Windows and FreeBSD, since
-    // XSI requires the value to be 1.000.000 (10x the condition here), and
-    // we want to be very sure that at least one platform in CI can catch
-    // regressions (through a failing test).
-    return;
-  }
-#endif
 
-  clock_t baseline = 0;
   for (int i = 0; i < num_cases; ++i) {
     XML_Parser parser = XML_ParserCreate(NULL);
     assert_true(parser != NULL);
     enum XML_Status status;
-    set_subtest("max_slowdown=%d text=\"%saaaaaa%s\"", max_slowdown,
-                text[i].pre, text[i].post);
-    const clock_t start = clock();
+    set_subtest("text=\"%saaaaaa%s\"", text[i].pre, text[i].post);
 
     // parse the start text
+    g_bytesScanned = 0;
     status = _XML_Parse_SINGLE_BYTES(parser, text[i].pre,
                                      (int)strlen(text[i].pre), XML_FALSE);
     if (status != XML_STATUS_OK) {
       xml_failure(parser);
     }
+
     // parse lots of 'a', failing the test early if it takes too long
+    unsigned past_max_count = 0;
     for (int f = 0; f < fillcount; ++f) {
       status = _XML_Parse_SINGLE_BYTES(parser, aaaaaa, fillsize, XML_FALSE);
       if (status != XML_STATUS_OK) {
         xml_failure(parser);
       }
-      // i == 0 means we're still calculating the baseline value
-      if (i > 0) {
-        const clock_t now = clock();
-        const clock_t clocks_so_far = now - start;
-        const int slowdown = clocks_so_far / baseline;
-        if (slowdown >= max_slowdown) {
-          fprintf(
-              stderr,
-              "fill#%d: clocks_so_far=%d baseline=%d slowdown=%d max_slowdown=%d\n",
-              f, (int)clocks_so_far, (int)baseline, slowdown, max_slowdown);
-          fail(too_slow_failure_message);
-        }
+      if (g_bytesScanned > max_scanned) {
+        // We're not done, and have already passed the limit -- the test will
+        // definitely fail. This block allows us to save time by failing early.
+        const unsigned pushed
+            = (unsigned)strlen(text[i].pre) + (f + 1) * fillsize;
+        fprintf(
+            stderr,
+            "after %d/%d loops: pushed=%u scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
+            f + 1, fillcount, pushed, g_bytesScanned,
+            g_bytesScanned / (double)pushed, max_scanned, max_factor);
+        past_max_count++;
+        // We are failing, but allow a few log prints first. If we don't reach
+        // a count of five, the test will fail after the loop instead.
+        assert_true(past_max_count < 5);
       }
     }
+
     // parse the end text
     status = _XML_Parse_SINGLE_BYTES(parser, text[i].post,
                                      (int)strlen(text[i].post), XML_TRUE);
@@ -5286,18 +5272,14 @@ START_TEST(test_big_tokens_take_linear_time) {
       xml_failure(parser);
     }
 
-    // how long did it take in total?
-    const clock_t end = clock();
-    const clock_t taken = end - start;
-    if (i == 0) {
-      assert_true(taken > 0); // just to make sure we don't div-by-0 later
-      baseline = taken;
-    }
-    const int slowdown = taken / baseline;
-    if (slowdown >= max_slowdown) {
-      fprintf(stderr, "taken=%d baseline=%d slowdown=%d max_slowdown=%d\n",
-              (int)taken, (int)baseline, slowdown, max_slowdown);
-      fail(too_slow_failure_message);
+    assert_true(g_bytesScanned > approx_bytes); // or the counter isn't working
+    if (g_bytesScanned > max_scanned) {
+      fprintf(
+          stderr,
+          "after all input: scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
+          g_bytesScanned, g_bytesScanned / (double)approx_bytes, max_scanned,
+          max_factor);
+      fail("scanned too many bytes");
     }
 
     XML_ParserFree(parser);
@@ -5774,19 +5756,17 @@ START_TEST(test_varying_buffer_fills) {
                 fillsize[2], fillsize[3]);
     XML_Parser parser = XML_ParserCreate(NULL);
     assert_true(parser != NULL);
-    g_parseAttempts = 0;
 
     CharData storage;
     CharData_Init(&storage);
     XML_SetUserData(parser, &storage);
     XML_SetStartElementHandler(parser, start_element_event_handler);
 
+    g_bytesScanned = 0;
     int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call)
-    int scanned_bytes = 0;   // sum of (buffered bytes at each actual parse)
     int offset = 0;
     while (*fillsize >= 0) {
       assert_true(offset + *fillsize <= document_length); // or test is invalid
-      const unsigned attempts_before = g_parseAttempts;
       const enum XML_Status status
           = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
       if (status != XML_STATUS_OK) {
@@ -5796,28 +5776,20 @@ START_TEST(test_varying_buffer_fills) {
       fillsize++;
       assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow
       worstcase_bytes += offset; // we might've tried to parse all pending bytes
-      if (g_parseAttempts != attempts_before) {
-        assert_true(g_parseAttempts == attempts_before + 1); // max 1/XML_Parse
-        assert_true(offset <= INT_MAX - scanned_bytes);      // avoid overflow
-        scanned_bytes += offset; // we *did* try to parse all pending bytes
-      }
     }
     assert_true(storage.count == 1); // the big token should've been parsed
-    assert_true(scanned_bytes > 0);  // test-the-test: does our counter work?
+    assert_true(g_bytesScanned > 0); // test-the-test: does our counter work?
     if (g_reparseDeferralEnabledDefault) {
       // heuristic is enabled; some XML_Parse calls may have deferred reparsing
-      const int max_bytes_scanned = -*fillsize;
-      if (scanned_bytes > max_bytes_scanned) {
+      const unsigned max_bytes_scanned = -*fillsize;
+      if (g_bytesScanned > max_bytes_scanned) {
         fprintf(stderr,
-                "bytes scanned in parse attempts: actual=%d limit=%d \n",
-                scanned_bytes, max_bytes_scanned);
+                "bytes scanned in parse attempts: actual=%u limit=%u \n",
+                g_bytesScanned, max_bytes_scanned);
         fail("too many bytes scanned in parse attempts");
       }
-      assert_true(scanned_bytes <= worstcase_bytes);
-    } else {
-      // heuristic is disabled; every XML_Parse() will have reparsed
-      assert_true(scanned_bytes == worstcase_bytes);
     }
+    assert_true(g_bytesScanned <= (unsigned)worstcase_bytes);
 
     XML_ParserFree(parser);
   }
@@ -6065,7 +6037,7 @@ make_basic_test_case(Suite *s) {
   tcase_add_test__ifdef_xml_dtd(tc_basic,
                                 test_pool_integrity_with_unfinished_attr);
   tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend);
-  tcase_add_test(tc_basic, test_big_tokens_take_linear_time);
+  tcase_add_test(tc_basic, test_big_tokens_scale_linearly);
   tcase_add_test(tc_basic, test_set_reparse_deferral);
   tcase_add_test(tc_basic, test_reparse_deferral_is_inherited);
   tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser);