From a86ae0e760ba807680cfc339df3ade63e70da3c4 Mon Sep 17 00:00:00 2001 From: Hugo Date: Thu, 20 Nov 2025 17:03:19 +0900 Subject: [PATCH 01/14] test(bound-storage): adding some test --- test/bound-storage/bound-storage.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 test/bound-storage/bound-storage.c diff --git a/test/bound-storage/bound-storage.c b/test/bound-storage/bound-storage.c new file mode 100644 index 0000000..90737e3 --- /dev/null +++ b/test/bound-storage/bound-storage.c @@ -0,0 +1,20 @@ +int main(void) +{ + char test[10]; + + // [!!] potential stack buffer overflow on variable '' (size 10) + // constant index 11 is out of bounds (0..9) + // (this is a write access) + test[11] = 'a'; + + test[9] = 'b'; // OK + + // [!!] potential stack buffer overflow on variable '' (size 10) + // constant index 18446744073709551615 is out of bounds (0..9) + // (this is a write access) + test[-1] = 'c'; + + test[11 - 2] = 'd'; // OK + + return 0; +} From 91423b4ad8c73f6210343c9d1a15d55da6a2a378 Mon Sep 17 00:00:00 2001 From: Hugo Date: Thu, 20 Nov 2025 18:05:49 +0900 Subject: [PATCH 02/14] test(bound-storage): adding some test --- .../bound-storage-for-statement.c | 24 +++++++++++++++++++ .../bound-storage-if-statement.c | 11 +++++++++ 2 files changed, 35 insertions(+) create mode 100644 test/bound-storage/bound-storage-for-statement.c create mode 100644 test/bound-storage/bound-storage-if-statement.c diff --git a/test/bound-storage/bound-storage-for-statement.c b/test/bound-storage/bound-storage-for-statement.c new file mode 100644 index 0000000..a349fbb --- /dev/null +++ b/test/bound-storage/bound-storage-for-statement.c @@ -0,0 +1,24 @@ +int main(void) +{ + char test[10]; + + // [!!] potential stack buffer overflow on variable '' (size 10) + // index variable may go up to 11 (array last valid index: 9) + // (this is a write access) + for (int i = 0; i < 20; i++) { + test[i] = 'a'; + } + + // [!!] potential stack buffer overflow on variable '' (size 10) + // index variable may go up to 19 (array last valid index: 9) + // (this is a write access) + for (int i = 0; i != 11; ++i) + test[i] = 'a'; + + // OK + for (int i = 0; i < 10; i++) { + test[i] = 'b'; + } + + return 0; +} diff --git a/test/bound-storage/bound-storage-if-statement.c b/test/bound-storage/bound-storage-if-statement.c new file mode 100644 index 0000000..92241da --- /dev/null +++ b/test/bound-storage/bound-storage-if-statement.c @@ -0,0 +1,11 @@ +int main(void) +{ + int i = 11; + char test[10]; + + if (i > 10) { + test[11] = 'a'; + } + char test1[10]; if (i <= 10) test1[i] = 'a'; + return 0; +} From bec1f35aa46b1c1525bbac305996d7c4b60f89df Mon Sep 17 00:00:00 2001 From: Hugo Date: Wed, 26 Nov 2025 19:16:05 +0900 Subject: [PATCH 03/14] Test: adding some tests --- .../bound-storage-for-statement.c | 19 +- .../bound-storage-if-statement.c | 38 +++- test/bound-storage/bound-storage.c | 4 +- test/bound-storage/deep-alias.c | 18 ++ .../indirection-profonde-aliasing.c | 38 ++++ test/bound-storage/ranges_test.c | 208 ++++++++++++++++++ test/bound-storage/struct_array_overflow.c | 59 +++++ test/cpy-buffer/bad-usage-memcpy.c | 18 ++ test/cpy-buffer/bad-usage-memset.c | 18 ++ test/escape-stack/direct-callback.c | 8 + test/escape-stack/global-buf.c | 14 ++ test/escape-stack/global_struct.c | 12 + test/escape-stack/indirect-callback.c | 8 + test/escape-stack/out_param.c | 13 ++ test/escape-stack/return-buf.c | 13 ++ test/escape-stack/stack_escape.c | 72 ++++++ test/multiple-storage/same-storage.c | 17 ++ test/no-error/basic-main.c | 4 + test/test.cpp | 41 ++++ test/vla/deguised-constant.c | 11 + test/vla/vla-read.c | 23 ++ test/vla/vla-scanf.c | 12 + 22 files changed, 659 insertions(+), 9 deletions(-) create mode 100644 test/bound-storage/deep-alias.c create mode 100644 test/bound-storage/indirection-profonde-aliasing.c create mode 100644 test/bound-storage/ranges_test.c create mode 100644 test/bound-storage/struct_array_overflow.c create mode 100644 test/cpy-buffer/bad-usage-memcpy.c create mode 100644 test/cpy-buffer/bad-usage-memset.c create mode 100644 test/escape-stack/direct-callback.c create mode 100644 test/escape-stack/global-buf.c create mode 100644 test/escape-stack/global_struct.c create mode 100644 test/escape-stack/indirect-callback.c create mode 100644 test/escape-stack/out_param.c create mode 100644 test/escape-stack/return-buf.c create mode 100644 test/escape-stack/stack_escape.c create mode 100644 test/multiple-storage/same-storage.c create mode 100644 test/no-error/basic-main.c create mode 100644 test/test.cpp create mode 100644 test/vla/deguised-constant.c create mode 100644 test/vla/vla-read.c create mode 100644 test/vla/vla-scanf.c diff --git a/test/bound-storage/bound-storage-for-statement.c b/test/bound-storage/bound-storage-for-statement.c index a349fbb..a4718ab 100644 --- a/test/bound-storage/bound-storage-for-statement.c +++ b/test/bound-storage/bound-storage-for-statement.c @@ -1,16 +1,17 @@ int main(void) { char test[10]; + char *ptr = test; - // [!!] potential stack buffer overflow on variable '' (size 10) - // index variable may go up to 11 (array last valid index: 9) + // [!!] potential stack buffer overflow on variable '' (size 10) + // index variable may go up to 19 (array last valid index: 9) // (this is a write access) for (int i = 0; i < 20; i++) { test[i] = 'a'; } - // [!!] potential stack buffer overflow on variable '' (size 10) - // index variable may go up to 19 (array last valid index: 9) + // [!!] potential stack buffer overflow on variable '' (size 10) + // index variable may go up to 11 (array last valid index: 9) // (this is a write access) for (int i = 0; i != 11; ++i) test[i] = 'a'; @@ -20,5 +21,15 @@ int main(void) test[i] = 'b'; } + // Same for pointer aliasing + // [!!] potential stack buffer overflow on variable '' (size 10) + // index variable may go up to 19 (array last valid index: 9) + // (this is a write access) + for (int i = 0; i < 20; i++) { + ptr[i] = 'a'; + } + + int n = 6; + char buf[n]; // alloca variable return 0; } diff --git a/test/bound-storage/bound-storage-if-statement.c b/test/bound-storage/bound-storage-if-statement.c index 92241da..6a7d009 100644 --- a/test/bound-storage/bound-storage-if-statement.c +++ b/test/bound-storage/bound-storage-if-statement.c @@ -1,11 +1,43 @@ +// int main(void) +// { +// int i = 1; +// char test[10]; + +// if (i > 10) { +// test[11] = 'a'; +// } +// char test1[10]; if (i <= 10) test1[i] = 'a'; +// return 0; +// } + +// [warn] multiple stores to stack buffer '' in this function (2 store instruction(s), 2 distinct index expression(s)) +// stores use different index expressions; verify indices are correct and non-overlapping int main(void) { int i = 11; char test[10]; - if (i > 10) { +// [!!] potential stack buffer overflow on variable '' (size 10) +// constant index 11 is out of bounds (0..9) +// (this is a write access) +// [info] this access appears unreachable at runtime (condition is always false for this branch) + if (i <= 10) test[11] = 'a'; - } - char test1[10]; if (i <= 10) test1[i] = 'a'; + + // [!!] potential stack buffer overflow on variable '' (size 10) + // index variable may go up to 10 (array last valid index: 9) + // (this is a write access) + // [info] this access appears unreachable at runtime (condition is always false for this branch) + char test1[10]; + if (i <= 10) + test1[i] = 'a'; + +// [!!] potential stack buffer overflow on variable '' (size 10) +// index variable may go up to 10 (array last valid index: 9) +// (this is a write access) + char test2[10]; + if (i > 10) + test1[i] = 'a'; + return 0; } diff --git a/test/bound-storage/bound-storage.c b/test/bound-storage/bound-storage.c index 90737e3..3d5b263 100644 --- a/test/bound-storage/bound-storage.c +++ b/test/bound-storage/bound-storage.c @@ -2,14 +2,14 @@ int main(void) { char test[10]; - // [!!] potential stack buffer overflow on variable '' (size 10) + // [!!] potential stack buffer overflow on variable '' (size 10) // constant index 11 is out of bounds (0..9) // (this is a write access) test[11] = 'a'; test[9] = 'b'; // OK - // [!!] potential stack buffer overflow on variable '' (size 10) + // [!!] potential stack buffer overflow on variable '' (size 10) // constant index 18446744073709551615 is out of bounds (0..9) // (this is a write access) test[-1] = 'c'; diff --git a/test/bound-storage/deep-alias.c b/test/bound-storage/deep-alias.c new file mode 100644 index 0000000..acb2eeb --- /dev/null +++ b/test/bound-storage/deep-alias.c @@ -0,0 +1,18 @@ +void deep_alias(char *src) +{ + char buf[10]; + char *p1 = buf; + char *p2 = p1; + char **pp = &p2; + + for (int i = 0; i < 20; ++i) { + (*pp)[i] = src[i]; + } +} + +int main(void) +{ + char src[20] = {0}; + deep_alias(src); + return 0; +} diff --git a/test/bound-storage/indirection-profonde-aliasing.c b/test/bound-storage/indirection-profonde-aliasing.c new file mode 100644 index 0000000..ca5da42 --- /dev/null +++ b/test/bound-storage/indirection-profonde-aliasing.c @@ -0,0 +1,38 @@ +int main(void) +{ + char test[10]; + char *ptr = test; + char **pp = &ptr; + (ptr)[14] = 'a'; + (*pp)[15] = 'a'; + + // [!!] potential stack buffer overflow on variable '' (size 10) + // index variable may go up to 19 (array last valid index: 9) + // (this is a write access) + for (int i = 0; i < 20; i++) { + test[i] = 'a'; + } + + // [!!] potential stack buffer overflow on variable '' (size 10) + // index variable may go up to 11 (array last valid index: 9) + // (this is a write access) + for (int i = 0; i != 11; ++i) + test[i] = 'a'; + + // OK + for (int i = 0; i < 10; i++) { + test[i] = 'b'; + } + + // Same for pointer aliasing + // [!!] potential stack buffer overflow on variable '' (size 10) + // index variable may go up to 19 (array last valid index: 9) + // (this is a write access) + for (int i = 0; i < 20; i++) { + ptr[i] = 'a'; + } + + int n = 6; + char buf[n]; // alloca variable + return 0; +} diff --git a/test/bound-storage/ranges_test.c b/test/bound-storage/ranges_test.c new file mode 100644 index 0000000..b6dc757 --- /dev/null +++ b/test/bound-storage/ranges_test.c @@ -0,0 +1,208 @@ +#include + +/* + * 1) Cas simples : borne sup OK / pas OK + */ + +// AUCUN WARNING attendu (UB = 9, taille = 10) +void ub_ok(int i) +{ + char buf[10]; + + if (i <= 9) + buf[i] = 'A'; +} + +// WARNING UB attendu (UB = 10, taille = 10) +void ub_overflow(int i) +{ + char buf[10]; + + if (i <= 10) + buf[i] = 'B'; +} + +/* + * 2) Borne inf négative : index potentiellement < 0 + */ + +// WARNING LB négatif attendu (i >= -3 && i < 5) +void lb_negative(int i) +{ + char buf[10]; + + if (i >= -3 && i < 5) + buf[i] = 'C'; +} + +// WARNING LB négatif + UB hors borne (i >= -3 && i <= 15) +void lb_and_ub(int i) +{ + char buf[10]; + + if (i >= -3 && i <= 15) + buf[i] = 'D'; +} + +/* + * 3) if imbriqués : affiner l’intervalle (LB & UB) + * + * if (i <= 10) { + * if (i > 5) + * buf[i] = 'E'; + * } + * + * Ici, on sait que 6 <= i <= 10 + * avec buf[8] → UB hors borne + */ + +// ATTENDU : UB hors borne (taille 8, i ∈ [6,10]) +void nested_if_overflow(int i) +{ + char buf[8]; + + if (i <= 10) { + if (i > 5) { + buf[i] = 'E'; + } + } +} + +// Variante “safe” pour comparaison (taille 16, i ∈ [6,10]) → idéalement aucun warning +void nested_if_ok(int i) +{ + char buf[16]; + + if (i <= 10) { + if (i > 5) { + buf[i] = 'F'; + } + } +} + +/* + * 4) Boucles : patterns classiques de for + */ + +// AUCUN WARNING attendu (0 <= i < 10, taille 10) +void loop_ok(void) +{ + char buf[10]; + + for (int i = 0; i < 10; ++i) + buf[i] = 'G'; +} + +// WARNING UB attendu (0 <= i <= 10, taille = 10) +void loop_ub_overflow(void) +{ + char buf[10]; + + for (int i = 0; i <= 10; ++i) + buf[i] = 'H'; +} + +// WARNING LB négatif attendu (-3 <= i < 5, taille = 10) +void loop_lb_negative(void) +{ + char buf[10]; + + for (int i = -3; i < 5; ++i) + buf[i] = 'I'; +} + +/* + * 5) Cas unreachable mais avec accès hors borne + * (tu as déjà ce genre de logique, mais ça teste qu’on garde bien l’info) + */ + +// ATTENDU : warning overflow + [info] unreachable +void unreachable_example(void) +{ + int i = 1; + char buf[10]; + + if (i > 10) { // condition fausse à l’exécution + buf[11] = 'J'; + } +} + +/* + * 6) Aliasing de pointeur + intervalle (LB & UB) + */ + +// ATTENDU : UB + LB négatif (p = buf) +void alias_lb_ub(int i) +{ + char buf[10]; + char *p = buf; + + if (i >= -2 && i <= 12) + p[i] = 'K'; +} + +// ATTENDU : aucun warning (0 <= i < 10) +void alias_ok(int i) +{ + char buf[10]; + char *p = buf; + + if (i >= 0 && i < 10) + p[i] = 'L'; +} + +/* + * 7) Combinaison bizarre : bornes serrées, mais toujours safe + * i ∈ [2,7], buf[8] → normalement OK + */ + +void tight_range_ok(int i) +{ + char buf[8]; + + if (i >= 2 && i <= 7) + buf[i] = 'M'; +} + +/* + * 8) Cas extrême : bornes très larges + * i >= -100 && i <= 100, buf[10] → LB négatif + UB hors borne + */ + +void huge_range(int i) +{ + char buf[10]; + + if (i >= -100 && i <= 100) + buf[i] = 'N'; +} + +/* + * main : juste pour que le compilateur ne vire pas tout si optimisation + */ + +int main(void) +{ + ub_ok(5); + ub_overflow(10); + + lb_negative(-1); + lb_and_ub(20); + + nested_if_overflow(8); + nested_if_ok(8); + + loop_ok(); + loop_ub_overflow(); + loop_lb_negative(); + + unreachable_example(); + + alias_lb_ub(0); + alias_ok(5); + + tight_range_ok(3); + huge_range(0); + + return 0; +} diff --git a/test/bound-storage/struct_array_overflow.c b/test/bound-storage/struct_array_overflow.c new file mode 100644 index 0000000..291da2d --- /dev/null +++ b/test/bound-storage/struct_array_overflow.c @@ -0,0 +1,59 @@ +#include + +struct S { + char buf[10]; + int x; +}; + +void ok_direct(void) +{ + struct S s; + for (int i = 0; i < 10; ++i) + s.buf[i] = 'A'; // OK +} + +// Function: overflow_eq_10 (line 19, column 18) +// [!!] potential stack buffer overflow on variable 's' (size 10) +// alias path: s -> buf +// index variable may go up to 10 (array last valid index: 9) +// (this is a write access) +void overflow_eq_10(void) +{ + struct S s; + for (int i = 0; i <= 10; ++i) + s.buf[i] = 'B'; // i == 10 -> overflow +} + +// Function: overflow_const_index (line 25, column 15) +// [!!] potential stack buffer overflow on variable 's' (size 10) +// alias path: s -> buf +// constant index 11 is out of bounds (0..9) +// (this is a write access) +void overflow_const_index(void) +{ + struct S s; + s.buf[11] = 'C'; // overflow constant +} + +// Function: nested_if_overflow (line 34, column 18) +// [!!] potential stack buffer overflow on variable 's' (size 10) +// alias path: s -> buf +// index variable may go up to 15 (array last valid index: 9) +// (this is a write access) +void nested_if_overflow(void) +{ + struct S s; + int i = 15; + + if (i > 5 && i <= 15) // ton analyse de bornes devrait voir UB = 15 + s.buf[i] = 'D'; // overflow +} + +int main(void) +{ + ok_direct(); + overflow_eq_10(); + overflow_const_index(); + nested_if_overflow(); + return 0; +} diff --git a/test/cpy-buffer/bad-usage-memcpy.c b/test/cpy-buffer/bad-usage-memcpy.c new file mode 100644 index 0000000..fb3b533 --- /dev/null +++ b/test/cpy-buffer/bad-usage-memcpy.c @@ -0,0 +1,18 @@ +#include + +// Function: foo +// [!!] potential stack buffer overflow in memcpy on variable '' +// destination stack buffer size: 10 bytes +// requested 20 bytes to be copied/initialized +void foo(char *src) +{ + char buf[10]; + memcpy(buf, src, 20); +} + +int main(void) +{ + char src[20] = {0}; + foo(src); + return 0; +} diff --git a/test/cpy-buffer/bad-usage-memset.c b/test/cpy-buffer/bad-usage-memset.c new file mode 100644 index 0000000..31e07ff --- /dev/null +++ b/test/cpy-buffer/bad-usage-memset.c @@ -0,0 +1,18 @@ +#include + +// Function: foo +// [!!] potential stack buffer overflow in memset on variable '' +// destination stack buffer size: 10 bytes +// requested 100 bytes to be copied/initialized +void foo(char *src) +{ + char buf[10]; + memset(buf, 0, 100); +} + +int main(void) +{ + char src[20] = {0}; + foo(src); + return 0; +} diff --git a/test/escape-stack/direct-callback.c b/test/escape-stack/direct-callback.c new file mode 100644 index 0000000..01ed683 --- /dev/null +++ b/test/escape-stack/direct-callback.c @@ -0,0 +1,8 @@ +// case_call_arg.c +void sink(char *p); + +void pass_to_sink(void) +{ + char buf[10]; + sink(buf); // le callee peut capturer le pointeur +} diff --git a/test/escape-stack/global-buf.c b/test/escape-stack/global-buf.c new file mode 100644 index 0000000..6e1e260 --- /dev/null +++ b/test/escape-stack/global-buf.c @@ -0,0 +1,14 @@ +// tests/stack_escape_global.c +static char *g; + +void set_global(void) +{ + char buf[10]; + g = buf; // warning attendu: store_global +} + +int main(void) +{ + set_global(); + return 0; +} diff --git a/test/escape-stack/global_struct.c b/test/escape-stack/global_struct.c new file mode 100644 index 0000000..0584897 --- /dev/null +++ b/test/escape-stack/global_struct.c @@ -0,0 +1,12 @@ +// case_global_struct.c +struct Holder { + char *p; +}; + +struct Holder G; + +void store_in_global_field(void) +{ + char buf[10]; + G.p = buf; // fuite : G est global +} diff --git a/test/escape-stack/indirect-callback.c b/test/escape-stack/indirect-callback.c new file mode 100644 index 0000000..dce17d8 --- /dev/null +++ b/test/escape-stack/indirect-callback.c @@ -0,0 +1,8 @@ +// case_callback.c +typedef void (*cb_t)(char *); + +void use_callback(cb_t cb) +{ + char buf[10]; + cb(buf); // fuite potentielle via callback +} diff --git a/test/escape-stack/out_param.c b/test/escape-stack/out_param.c new file mode 100644 index 0000000..76c9f87 --- /dev/null +++ b/test/escape-stack/out_param.c @@ -0,0 +1,13 @@ +// case_out_param.c +void leak_out_param(char **out) +{ + char buf[10]; + *out = buf; // fuite via paramètre de sortie +} + +// case_out_param_safe.c +void safe_out_param(char **out) +{ + char *local = 0; // pointeur, mais pas de stack buffer derrière + *out = local; // pas une adresse de variable de stack +} diff --git a/test/escape-stack/return-buf.c b/test/escape-stack/return-buf.c new file mode 100644 index 0000000..8bf0ad1 --- /dev/null +++ b/test/escape-stack/return-buf.c @@ -0,0 +1,13 @@ +// tests/stack_escape_return.c +char *ret_buf(void) +{ + char buf[10]; + return buf; // warning attendu: return +} + +int main(void) +{ + char *p = ret_buf(); + (void)p; + return 0; +} diff --git a/test/escape-stack/stack_escape.c b/test/escape-stack/stack_escape.c new file mode 100644 index 0000000..d3da7b5 --- /dev/null +++ b/test/escape-stack/stack_escape.c @@ -0,0 +1,72 @@ +// tests/stack_escape.c +char *g_ptr; +struct Holder { + char *p; +}; +struct Holder G; + +typedef void (*cb_t)(char *); + +char *ret_buf(void) +{ + char buf[10]; + return buf; +} + +void store_global(void) +{ + char buf[10]; + g_ptr = buf; +} + +void store_in_global_field(void) +{ + char buf[10]; + G.p = buf; +} + +void leak_out_param(char **out) +{ + char buf[10]; + *out = buf; +} + +void safe_out_param(char **out) +{ + char *local = 0; + *out = local; +} + +void use_callback(cb_t cb) +{ + char buf[10]; + cb(buf); +} + +void sink(char *p); + +void pass_to_sink(void) +{ + char buf[10]; + sink(buf); +} + +void local_alias_only(void) +{ + char buf[10]; + char *p = buf; + char **pp = &p; + (void)pp; +} + +int main(void) +{ + char *p; + leak_out_param(&p); + use_callback((cb_t)0); + pass_to_sink(); + local_alias_only(); + store_global(); + store_in_global_field(); + return 0; +} diff --git a/test/multiple-storage/same-storage.c b/test/multiple-storage/same-storage.c new file mode 100644 index 0000000..a560832 --- /dev/null +++ b/test/multiple-storage/same-storage.c @@ -0,0 +1,17 @@ +#include + +void foo(void) +{ + char buf[10]; + buf[0] = 'a'; + buf[1] = 'b'; + buf[2] = 'c'; + buf[2] = 'c'; +} + +int main(void) +{ + char src[20] = {0}; + foo(); + return 0; +} diff --git a/test/no-error/basic-main.c b/test/no-error/basic-main.c new file mode 100644 index 0000000..58fe692 --- /dev/null +++ b/test/no-error/basic-main.c @@ -0,0 +1,4 @@ +int main(void) +{ + return 0; +} diff --git a/test/test.cpp b/test/test.cpp new file mode 100644 index 0000000..b03b96b --- /dev/null +++ b/test/test.cpp @@ -0,0 +1,41 @@ +#include +#include + +void toto(void) +{ + char test[100]; + int a = 5; + int b = 10; + int sum = a + b; + test[0] = '\0'; // Initialize the string + snprintf(test, sizeof(test), "Hello, World! %d %d %d\n", a, b, sum); + printf("%s", test); + std::cout << test; +} + +void tutu(void) +{ + static int counter = 0; + counter++; + if (counter == 5) + return; + tutu(); +} + +int main(void) +{ + int a = 5; + int b = 10; + int sum = a + b; + const bool is_ok = false; + + if (is_ok) { + char test[100]; + test[0] = '\0'; // Initialize the string + snprintf(test, sizeof(test), "Hello, World! %d %d %d\n", a, b, sum); + } + + tutu(); + + return sum; +} diff --git a/test/vla/deguised-constant.c b/test/vla/deguised-constant.c new file mode 100644 index 0000000..029dde2 --- /dev/null +++ b/test/vla/deguised-constant.c @@ -0,0 +1,11 @@ +void foo(void) +{ + int n = 6; + char buf[n]; // techniquement VLA, mais bornée et triviale +} + +int main(int ac, char **av) +{ + foo(); + return 0; +} diff --git a/test/vla/vla-read.c b/test/vla/vla-read.c new file mode 100644 index 0000000..ac1e815 --- /dev/null +++ b/test/vla/vla-read.c @@ -0,0 +1,23 @@ +#include +#include + +int main(void) +{ + char tmp[1024]; + + ssize_t n = read(STDIN_FILENO, tmp, sizeof(tmp)); + if (n <= 0) + return 1; + + // char *buf = malloc(n); + int len = (int)n; + char buf[len]; + if (!buf) + return 1; + + for (ssize_t i = 0; i < n; ++i) + buf[i] = tmp[i]; + + free(buf); + return 0; +} diff --git a/test/vla/vla-scanf.c b/test/vla/vla-scanf.c new file mode 100644 index 0000000..6b9c7fb --- /dev/null +++ b/test/vla/vla-scanf.c @@ -0,0 +1,12 @@ +#include + +int main(void) +{ + int n; + if (scanf("%d", &n) != 1) + return 1; + + char buf[n]; // VLA aussi + + return 0; +} From 37204365f4f7235c413d559e9867a34b788ee164 Mon Sep 17 00:00:00 2001 From: Hugo Date: Wed, 26 Nov 2025 19:17:40 +0900 Subject: [PATCH 04/14] docs(readme): add extern-project sample illustrating FetchContent-based integration --- extern-project/CMakeLists.txt | 23 +++++++++++++++++++++++ extern-project/src/main.cpp | 29 +++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 extern-project/CMakeLists.txt create mode 100644 extern-project/src/main.cpp diff --git a/extern-project/CMakeLists.txt b/extern-project/CMakeLists.txt new file mode 100644 index 0000000..cd5ad94 --- /dev/null +++ b/extern-project/CMakeLists.txt @@ -0,0 +1,23 @@ +cmake_minimum_required(VERSION 3.16) +project(consumer_example CXX) + +set(CMAKE_CXX_STANDARD 20) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +include(FetchContent) + +FetchContent_Declare( + stack_analyzer + GIT_REPOSITORY https://github.com/CoreTrace/coretrace-stack-analyzer.git + GIT_TAG feature/stack-buffer-bounds-check +# GIT_TAG main +) + +FetchContent_MakeAvailable(stack_analyzer) + +add_executable(sa_consumer src/main.cpp) + +target_link_libraries(sa_consumer + PRIVATE + coretrace::stack_usage_analyzer_static +) diff --git a/extern-project/src/main.cpp b/extern-project/src/main.cpp new file mode 100644 index 0000000..595b887 --- /dev/null +++ b/extern-project/src/main.cpp @@ -0,0 +1,29 @@ +#include "StackUsageAnalyzer.hpp" +#include +#include +#include + +int main(int argc, char **argv) +{ + if (argc < 2) + { + std::cerr << "usage: sa_consumer \n"; + return 1; + } + + std::string filename = argv[1]; + + llvm::LLVMContext ctx; + llvm::SMDiagnostic diag; + + ctrace::stack::AnalysisConfig cfg; + cfg.mode = ctrace::stack::AnalysisMode::IR; + cfg.stackLimit = 8 * 1024 * 1024; + + auto res = ctrace::stack::analyzeFile(filename, cfg, ctx, diag); + + // Exemple : output SARIF sur stdout + std::cout << ctrace::stack::toSarif(res, filename) << std::endl; + + return 0; +} From 45658e428a3cedcf612c7d57eedf161a0ca2f482 Mon Sep 17 00:00:00 2001 From: Hugo Date: Wed, 26 Nov 2025 19:20:46 +0900 Subject: [PATCH 05/14] Feat: adding mangling for CPP symbols and some new stuff --- include/StackUsageAnalyzer.hpp | 37 ++++++++++++++++- include/mangle.hpp | 75 ++++++++++++++++++++++++++++++++++ 2 files changed, 111 insertions(+), 1 deletion(-) create mode 100644 include/mangle.hpp diff --git a/include/StackUsageAnalyzer.hpp b/include/StackUsageAnalyzer.hpp index adca279..323cfa6 100644 --- a/include/StackUsageAnalyzer.hpp +++ b/include/StackUsageAnalyzer.hpp @@ -27,8 +27,10 @@ enum class AnalysisMode // Configuration de l'analyse (mode + limite de stack) struct AnalysisConfig { - AnalysisMode mode = AnalysisMode::IR; + AnalysisMode mode = AnalysisMode::IR; StackSize stackLimit = 8ull * 1024ull * 1024ull; // 8 MiB par défaut + bool quiet = false; + bool warningsOnly = false; }; // Résultat par fonction @@ -43,13 +45,46 @@ struct FunctionResult bool exceedsLimit = false; // maxStack > config.stackLimit }; +enum class DiagnosticSeverity +{ + Info = 0, + Warning = 1, + Error = 2 +}; + +struct Diagnostic +{ + std::string funcName; + unsigned line = 0; + unsigned column = 0; + DiagnosticSeverity severity = DiagnosticSeverity::Warning; + std::string message; +}; + // Résultat global pour un module struct AnalysisResult { AnalysisConfig config; std::vector functions; + // Human-readable diagnostics (buffer overflows, VLAs, memcpy issues, escapes, etc.) + // All messages are formatted and then printed in main(). + // std::vector diagnostics; + std::vector diagnostics; }; +// Serialize an AnalysisResult to a simple JSON format (pour CI / GitHub Actions). +// `inputFile` : chemin du fichier analysé (celui que tu passes à analyzeFile). +std::string toJson(const AnalysisResult &result, + const std::string &inputFile); + +// Serialize an AnalysisResult to SARIF 2.1.0 (compatible GitHub Code Scanning). +// `inputFile` : chemin du fichier analysé. +// `toolName` / `toolVersion` : metadata du tool dans le SARIF. +std::string toSarif(const AnalysisResult &result, + const std::string &inputFile, + const std::string &toolName = "coretrace-stack-analyzer", + const std::string &toolVersion = "0.1.0"); + // Analyse un module déjà chargé (tu peux réutiliser dans d'autres outils) AnalysisResult analyzeModule(llvm::Module &mod, const AnalysisConfig &config); diff --git a/include/mangle.hpp b/include/mangle.hpp new file mode 100644 index 0000000..93eb6ac --- /dev/null +++ b/include/mangle.hpp @@ -0,0 +1,75 @@ +#include +#include +#include +#include +#include +#include + +namespace ctrace_tools +{ + /** + * @brief Concept to define types that can be converted to `std::string_view`. + * + * The `StringLike` concept ensures that any type passed to functions + * requiring it can be implicitly converted to `std::string_view`. + */ + template + concept StringLike = std::convertible_to; + // TODO: add mangling for windows + + /** + * @brief Checks if a given name is a mangled C++ symbol. + * + * This function determines whether a given name follows the Itanium C++ ABI + * mangling conventions (e.g., names starting with `_Z`). + * + * @tparam T A type satisfying the `StringLike` concept. + * @param name The name to check for mangling. + * @return `true` if the name is mangled, `false` otherwise. + * + * @note This function uses `abi::__cxa_demangle` to attempt demangling. + * If the demangling succeeds, the name is considered mangled. + * @note This implementation is specific to platforms using the Itanium C++ ABI + * (e.g., Linux, macOS). Windows mangling is not yet supported. + * @note The function is marked `[[nodiscard]]`, meaning the return value + * should not be ignored. It is also `noexcept`, indicating that it + * does not throw exceptions. + */ + [[nodiscard]]bool isMangled(StringLike auto name) noexcept + { + int status = 0; + std::string_view sv{name}; + + if (sv.length() < 2 || sv.substr(0, 2) != "_Z") + { + return false; + } + + std::unique_ptr demangled( + abi::__cxa_demangle(sv.data(), nullptr, nullptr, &status), + std::free + ); + return status == 0; + } + + /** + * @brief Generates a mangled name for a function. + * + * This function creates a mangled name for a function based on its namespace, + * name, and parameter types. The mangling follows the Itanium C++ ABI conventions. + * + * @param namespaceName The namespace of the function. + * @param functionName The name of the function. + * @param paramTypes A vector of strings representing the parameter types. + * @return A `std::string` containing the mangled name. + * + * @note The implementation of this function is not provided in the current file. + */ + [[nodiscard]] std::string mangleFunction( + const std::string& namespaceName, + const std::string& functionName, + const std::vector& paramTypes + ); + + [[nodiscard]] std::string demangle(const char *name); +}; From 8cff6e771b760c1db7ff69d77b6dd623c75c2079 Mon Sep 17 00:00:00 2001 From: Hugo Date: Wed, 26 Nov 2025 19:21:20 +0900 Subject: [PATCH 06/14] Feat: adding mangling for CPP symbols and some new stuff --- main.cpp | 92 +- src/StackUsageAnalyzer.cpp | 1721 +++++++++++++++++++++++++++++++++++- src/mangle.cpp | 82 ++ 3 files changed, 1882 insertions(+), 13 deletions(-) create mode 100644 src/mangle.cpp diff --git a/main.cpp b/main.cpp index 9f32007..6316851 100644 --- a/main.cpp +++ b/main.cpp @@ -5,24 +5,63 @@ #include #include #include +#include "mangle.hpp" using namespace ctrace::stack; +enum class OutputFormat +{ + Human, + Json, + Sarif +}; + int main(int argc, char **argv) { llvm::LLVMContext context; llvm::SMDiagnostic err; const char *inputFilename = nullptr; + OutputFormat outputFormat = OutputFormat::Human; + AnalysisConfig cfg; // mode = IR, stackLimit = 8MiB par défaut + cfg.quiet = false; + cfg.warningsOnly = false; + // cfg.mode = AnalysisMode::IR; -> already set by default constructor + // cfg.stackLimit = 8ull * 1024ull * 1024ull; // 8 MiB -> already set by default constructor but needed to be set with args - for (int i = 1; i < argc; ++i) { + for (int i = 1; i < argc; ++i) + { const char *arg = argv[i]; - if (std::strncmp(arg, "--mode=", 7) == 0) { + std::string argStr{arg}; + if (argStr == "--quiet") + { + cfg.quiet = true; + continue; + } + if (argStr == "--warnings-only") + { + cfg.warningsOnly = true; + continue; + } + if (argStr == "--format=json") + { + outputFormat = OutputFormat::Json; + continue; + } + else if (argStr == "--format=sarif") + { + outputFormat = OutputFormat::Sarif; + continue; + } + if (std::strncmp(arg, "--mode=", 7) == 0) + { const char *modeStr = arg + 7; - if (std::strcmp(modeStr, "ir") == 0) { + if (std::strcmp(modeStr, "ir") == 0) + { cfg.mode = AnalysisMode::IR; - } else if (std::strcmp(modeStr, "abi") == 0) { + } else if (std::strcmp(modeStr, "abi") == 0) + { cfg.mode = AnalysisMode::ABI; } else { llvm::errs() << "Unknown mode: " << modeStr @@ -48,12 +87,29 @@ int main(int argc, char **argv) return 1; } + if (outputFormat == OutputFormat::Json) + { + llvm::outs() << ctrace::stack::toJson(result, inputFilename); + return 0; + } + + if (outputFormat == OutputFormat::Sarif) + { + llvm::outs() << ctrace::stack::toSarif(result, inputFilename, + "coretrace-stack-analyzer", "0.1.0"); + return 0; + } + llvm::outs() << "Mode: " << (result.config.mode == AnalysisMode::IR ? "IR" : "ABI") << "\n\n"; for (const auto &f : result.functions) { - llvm::outs() << "Function: " << f.name << "\n"; + std::vector param_types; + // param_types.reserve(issue.inst->getFunction()->arg_size()); + param_types.push_back("void"); // dummy to avoid empty vector issue // refaire avec les paramèters réels + + llvm::outs() << "Function: " << f.name << " " << ((ctrace_tools::isMangled(f.name)) ? ctrace_tools::demangle(f.name.c_str()) : "") << "\n"; llvm::outs() << " local stack: " << f.localStack << " bytes\n"; llvm::outs() << " max stack (including callees): " << f.maxStack << " bytes\n"; @@ -71,8 +127,34 @@ int main(int argc, char **argv) << result.config.stackLimit << " bytes\n"; } + if (!result.config.quiet) { + for (const auto &d : result.diagnostics) + { + if (d.funcName != f.name) + continue; + + // Si warningsOnly est actif, on ignore les diagnostics Info + if (result.config.warningsOnly && + d.severity == DiagnosticSeverity::Info) { + continue; + } + + if (d.line != 0) + { + llvm::outs() << " at line " << d.line + << ", column " << d.column << "\n"; + } + llvm::outs() << d.message << "\n"; + } + } + llvm::outs() << "\n"; } + // // Print all diagnostics collected during analysis + // for (const auto &msg : result.diagnostics) { + // llvm::outs() << msg << "\n"; + // } + return 0; } diff --git a/src/StackUsageAnalyzer.cpp b/src/StackUsageAnalyzer.cpp index 985ad02..5ba41ac 100644 --- a/src/StackUsageAnalyzer.cpp +++ b/src/StackUsageAnalyzer.cpp @@ -7,6 +7,11 @@ #include #include #include +#include +#include // std::snprintf + +#include +#include #include #include @@ -15,8 +20,12 @@ #include #include #include +#include +#include #include +#include #include +#include #include #include #include @@ -42,10 +51,901 @@ struct InternalAnalysisState { std::set InfiniteRecursionFuncs; // auto-récursion “infinie” }; +// Rapport interne pour les dépassements de buffer sur la stack +struct StackBufferOverflow { + std::string funcName; + std::string varName; + StackSize arraySize = 0; + StackSize indexOrUpperBound = 0; // utilisé pour les bornes sup (UB) ou index constant + bool isWrite = false; + bool indexIsConstant = false; + const llvm::Instruction *inst = nullptr; + + // Nouveau : violation basée sur une borne inférieure (index potentiellement négatif) + bool isLowerBoundViolation = false; + long long lowerBound = 0; // borne inférieure déduite (signée) + std::string aliasPath; // ex: "pp -> ptr -> buf" +}; + +// Intervalle d'entier pour une valeur : borne inférieure / supérieure (signées) +struct IntRange { + bool hasLower = false; + long long lower = 0; + bool hasUpper = false; + long long upper = 0; +}; + +// Rapport interne pour les allocations dynamiques sur la stack (VLA / alloca variable) +struct DynamicAllocaIssue { + std::string funcName; + std::string varName; + std::string typeName; + const llvm::AllocaInst *allocaInst = nullptr; +}; + +// Rapport interne pour les usages dangereux de memcpy/memset sur la stack +struct MemIntrinsicIssue { + std::string funcName; + std::string varName; + std::string intrinsicName; // "memcpy" / "memset" / "memmove" + StackSize destSizeBytes = 0; + StackSize lengthBytes = 0; + const llvm::Instruction *inst = nullptr; +}; + +// Rapport interne pour plusieurs stores dans un même buffer de stack +struct MultipleStoreIssue { + std::string funcName; + std::string varName; + std::size_t storeCount = 0; // nombre total de StoreInst vers ce buffer + std::size_t distinctIndexCount = 0; // nombre d'expressions d'index distinctes + const llvm::AllocaInst *allocaInst = nullptr; +}; + +// Rapport interne pour les fuites de pointeurs vers la stack +struct StackPointerEscapeIssue { + std::string funcName; + std::string varName; + std::string escapeKind; // "return", "store_global", "store_unknown", "call_arg", "call_callback" + std::string targetName; // nom du global, si applicable + const llvm::Instruction *inst = nullptr; +}; +// Analyse intra-fonction pour détecter les "fuites" de pointeurs de stack : +// - retour d'une adresse de variable locale (return buf;) +// - stockage de l'adresse d'une variable locale dans un global (global = buf;) +// +// Heuristique : pour chaque AllocaInst, on remonte son graphe d'utilisation +// en suivant les bitcast, GEP, PHI, select de type pointeur, et on marque +// comme "escape" : +// - tout return qui renvoie une valeur dérivée de cette alloca +// - tout store qui écrit cette valeur dans une GlobalVariable. +static void analyzeStackPointerEscapesInFunction( + llvm::Function &F, + std::vector &out) +{ + using namespace llvm; + + if (F.isDeclaration()) + return; + + for (BasicBlock &BB : F) { + for (Instruction &I : BB) { + auto *AI = dyn_cast(&I); + if (!AI) + continue; + + // On limite l'analyse aux slots "classiques" de stack (tout alloca) + SmallPtrSet visited; + SmallVector worklist; + worklist.push_back(AI); + + while (!worklist.empty()) { + const Value *V = worklist.back(); + worklist.pop_back(); + if (visited.contains(V)) + continue; + visited.insert(V); + + for (const Use &U : V->uses()) { + const User *Usr = U.getUser(); + + // 1) Retour direct ou via chaîne d'alias : return + if (auto *RI = dyn_cast(Usr)) { + StackPointerEscapeIssue issue; + issue.funcName = F.getName().str(); + issue.varName = AI->hasName() ? AI->getName().str() : std::string(""); + issue.escapeKind = "return"; + issue.targetName = {}; + issue.inst = RI; + out.push_back(std::move(issue)); + continue; + } + + // 2) Stockage de l'adresse : global = ; ou *out = ; + if (auto *SI = dyn_cast(Usr)) { + // Si la valeur stockée est notre pointeur (ou un alias de celui-ci) + if (SI->getValueOperand() == V) { + const Value *dstRaw = SI->getPointerOperand(); + const Value *dst = dstRaw->stripPointerCasts(); + + // 2.a) Stockage direct dans une variable globale : fuite évidente + if (auto *GV = dyn_cast(dst)) { + StackPointerEscapeIssue issue; + issue.funcName = F.getName().str(); + issue.varName = AI->hasName() ? AI->getName().str() : std::string(""); + issue.escapeKind = "store_global"; + issue.targetName = GV->hasName() ? GV->getName().str() : std::string{}; + issue.inst = SI; + out.push_back(std::move(issue)); + continue; + } + + // 2.b) Stockage via un pointeur non local (ex: *out = buf;) + // On ne connaît pas la durée de vie de la mémoire pointée par dst, + // mais si ce n'est pas une alloca de cette fonction, on considère + // que le pointeur de stack peut s'échapper (paramètre, heap, etc.). + if (!isa(dst)) { + StackPointerEscapeIssue issue; + issue.funcName = F.getName().str(); + issue.varName = AI->hasName() ? AI->getName().str() : std::string(""); + issue.escapeKind = "store_unknown"; + issue.targetName = dst->hasName() ? dst->getName().str() : std::string{}; + issue.inst = SI; + out.push_back(std::move(issue)); + continue; + } + + // 2.c) Stockage dans une alloca locale : on laisse l'alias + // continuer à être exploré via la boucle de travail. On ne + // considère pas cela comme une fuite immédiate. + const AllocaInst *dstAI = cast(dst); + worklist.push_back(dstAI); + } + // Sinon, c'est un store vers la stack ou un autre emplacement local + // qui ne contient pas directement notre pointeur, pas une fuite en soi. + continue; + } + + // 3) Passage de l'adresse à un appel de fonction : cb(buf); ou f(buf); + if (auto *CB = dyn_cast(Usr)) { + // On inspecte tous les arguments; si l'un d'eux est V (ou un alias direct), + // on considère que l'adresse de la variable locale est transmise. + for (unsigned argIndex = 0; argIndex < CB->arg_size(); ++argIndex) { + if (CB->getArgOperand(argIndex) != V) + continue; + + const Value *calledVal = CB->getCalledOperand(); + const Value *calledStripped = calledVal ? calledVal->stripPointerCasts() : nullptr; + const Function *directCallee = + calledStripped ? dyn_cast(calledStripped) : nullptr; + + StackPointerEscapeIssue issue; + issue.funcName = F.getName().str(); + issue.varName = AI->hasName() ? AI->getName().str() : std::string(""); + issue.inst = cast(CB); + + if (!directCallee) { + // Appel indirect via pointeur de fonction : callback typique. + issue.escapeKind = "call_callback"; + issue.targetName.clear(); + } else { + // Appel direct : on n'a pas de connaissance précise de la sémantique + // de la fonction appelée; on marque ça comme une fuite potentielle + // plus permissive. + issue.escapeKind = "call_arg"; + issue.targetName = directCallee->hasName() + ? directCallee->getName().str() + : std::string{}; + } + + out.push_back(std::move(issue)); + } + + // On ne propage pas l'alias via l'appel, mais on considère que + // l'adresse peut être capturée par la fonction appelée. + continue; + } + + // 4) Propagation des alias de pointeurs : + if (auto *BC = dyn_cast(Usr)) { + if (BC->getType()->isPointerTy()) + worklist.push_back(BC); + continue; + } + if (auto *GEP = dyn_cast(Usr)) { + worklist.push_back(GEP); + continue; + } + if (auto *PN = dyn_cast(Usr)) { + if (PN->getType()->isPointerTy()) + worklist.push_back(PN); + continue; + } + if (auto *Sel = dyn_cast(Usr)) { + if (Sel->getType()->isPointerTy()) + worklist.push_back(Sel); + continue; + } + + // Autres usages (load, comparaison, etc.) : pas une fuite, + // et on ne propage pas davantage. + } + } + } + } +} + +// -------------------------------------------------------------------------- +// Helpers pour analyser les allocas et les bornes d'index +// -------------------------------------------------------------------------- + +// Taille (en nombre d'éléments) pour une alloca de tableau sur la stack +static std::optional getAllocaElementCount(llvm::AllocaInst *AI) +{ + using namespace llvm; + + Type *elemTy = AI->getAllocatedType(); + StackSize count = 1; + + // Cas "char test[10];" => alloca [10 x i8] + if (auto *arrTy = dyn_cast(elemTy)) { + count *= arrTy->getNumElements(); + elemTy = arrTy->getElementType(); + } + + // Cas "alloca i8, i64 10" => alloca tableau avec taille constante + if (AI->isArrayAllocation()) { + if (auto *C = dyn_cast(AI->getArraySize())) { + count *= C->getZExtValue(); + } else { + // taille non constante - analyse plus compliquée, on ignore pour l'instant + return std::nullopt; + } + } + + return count; +} + +// Taille totale en octets pour une alloca sur la stack. +// Retourne std::nullopt si la taille dépend d'une valeur non constante (VLA). +static std::optional +getAllocaTotalSizeBytes(const llvm::AllocaInst *AI, const llvm::DataLayout &DL) +{ + using namespace llvm; + + Type *allocatedTy = AI->getAllocatedType(); + + // Cas alloca [N x T] (taille connue dans le type) + if (!AI->isArrayAllocation()) { + return DL.getTypeAllocSize(allocatedTy); + } + + // Cas alloca T, i64 (taille passée séparément) + if (auto *C = dyn_cast(AI->getArraySize())) { + uint64_t count = C->getZExtValue(); + uint64_t elemSize = DL.getTypeAllocSize(allocatedTy); + return count * elemSize; + } + + // Taille dynamique - traitée par l'analyse DynamicAllocaIssue + return std::nullopt; +} + +// Analyse des comparaisons ICmp pour déduire les intervalles d'entiers (bornes inf/sup) +static std::map +computeIntRangesFromICmps(llvm::Function &F) +{ + using namespace llvm; + + std::map ranges; + + auto applyConstraint = [&ranges](const Value *V, + bool hasLB, long long newLB, + bool hasUB, long long newUB) { + auto &R = ranges[V]; + if (hasLB) { + if (!R.hasLower || newLB > R.lower) { + R.hasLower = true; + R.lower = newLB; + } + } + if (hasUB) { + if (!R.hasUpper || newUB < R.upper) { + R.hasUpper = true; + R.upper = newUB; + } + } + }; + + for (BasicBlock &BB : F) { + for (Instruction &I : BB) { + auto *icmp = dyn_cast(&I); + if (!icmp) + continue; + + Value *op0 = icmp->getOperand(0); + Value *op1 = icmp->getOperand(1); + + ConstantInt *C = nullptr; + Value *V = nullptr; + + // On cherche un pattern "V ? C" ou "C ? V" + if ((C = dyn_cast(op1)) && !isa(op0)) { + V = op0; + } else if ((C = dyn_cast(op0)) && !isa(op1)) { + V = op1; + } else { + continue; + } + + auto pred = icmp->getPredicate(); + + bool hasLB = false, hasUB = false; + long long lb = 0, ub = 0; + + auto updateForSigned = [&](bool valueIsOp0) { + long long c = C->getSExtValue(); + if (valueIsOp0) { + switch (pred) { + case ICmpInst::ICMP_SLT: // V < C => V <= C-1 + hasUB = true; ub = c - 1; break; + case ICmpInst::ICMP_SLE: // V <= C => V <= C + hasUB = true; ub = c; break; + case ICmpInst::ICMP_SGT: // V > C => V >= C+1 + hasLB = true; lb = c + 1; break; + case ICmpInst::ICMP_SGE: // V >= C => V >= C + hasLB = true; lb = c; break; + case ICmpInst::ICMP_EQ: // V == C => [C, C] + hasLB = true; lb = c; + hasUB = true; ub = c; + break; + case ICmpInst::ICMP_NE: + // approximation : V != C => V <= C (très conservateur) + hasUB = true; ub = c; + break; + default: + break; + } + } else { + // C ? V <=> V ? C (inversé) + switch (pred) { + case ICmpInst::ICMP_SGT: // C > V => V < C => V <= C-1 + hasUB = true; ub = c - 1; break; + case ICmpInst::ICMP_SGE: // C >= V => V <= C + hasUB = true; ub = c; break; + case ICmpInst::ICMP_SLT: // C < V => V > C => V >= C+1 + hasLB = true; lb = c + 1; break; + case ICmpInst::ICMP_SLE: // C <= V => V >= C + hasLB = true; lb = c; break; + case ICmpInst::ICMP_EQ: // C == V => [C, C] + hasLB = true; lb = c; + hasUB = true; ub = c; + break; + case ICmpInst::ICMP_NE: + hasUB = true; ub = c; + break; + default: + break; + } + } + }; + + auto updateForUnsigned = [&](bool valueIsOp0) { + unsigned long long cu = C->getZExtValue(); + long long c = static_cast(cu); + if (valueIsOp0) { + switch (pred) { + case ICmpInst::ICMP_ULT: // V < C => V <= C-1 + hasUB = true; ub = c - 1; break; + case ICmpInst::ICMP_ULE: // V <= C + hasUB = true; ub = c; break; + case ICmpInst::ICMP_UGT: // V > C => V >= C+1 + hasLB = true; lb = c + 1; break; + case ICmpInst::ICMP_UGE: // V >= C + hasLB = true; lb = c; break; + case ICmpInst::ICMP_EQ: + hasLB = true; lb = c; + hasUB = true; ub = c; + break; + case ICmpInst::ICMP_NE: + hasUB = true; ub = c; + break; + default: + break; + } + } else { + switch (pred) { + case ICmpInst::ICMP_UGT: // C > V => V < C + hasUB = true; ub = c - 1; break; + case ICmpInst::ICMP_UGE: // C >= V => V <= C + hasUB = true; ub = c; break; + case ICmpInst::ICMP_ULT: // C < V => V > C + hasLB = true; lb = c + 1; break; + case ICmpInst::ICMP_ULE: // C <= V => V >= C + hasLB = true; lb = c; break; + case ICmpInst::ICMP_EQ: + hasLB = true; lb = c; + hasUB = true; ub = c; + break; + case ICmpInst::ICMP_NE: + hasUB = true; ub = c; + break; + default: + break; + } + } + }; + + bool valueIsOp0 = (V == op0); + + // On choisit le groupe de prédicats + if (pred == ICmpInst::ICMP_SLT || pred == ICmpInst::ICMP_SLE || + pred == ICmpInst::ICMP_SGT || pred == ICmpInst::ICMP_SGE || + pred == ICmpInst::ICMP_EQ || pred == ICmpInst::ICMP_NE) { + updateForSigned(valueIsOp0); + } else if (pred == ICmpInst::ICMP_ULT || pred == ICmpInst::ICMP_ULE || + pred == ICmpInst::ICMP_UGT || pred == ICmpInst::ICMP_UGE) { + updateForUnsigned(valueIsOp0); + } + + if (!(hasLB || hasUB)) + continue; + + // Applique la contrainte sur V lui-même + applyConstraint(V, hasLB, lb, hasUB, ub); + + // Et éventuellement sur le pointeur sous-jacent si V est un load + if (auto *LI = dyn_cast(V)) { + const Value *ptr = LI->getPointerOperand(); + applyConstraint(ptr, hasLB, lb, hasUB, ub); + } + } + } + + return ranges; +} + +// Forward declaration : essaie de retrouver une constante derrière une Value +static const llvm::ConstantInt* tryGetConstFromValue(const llvm::Value *V, + const llvm::Function &F); + +// Analyse intra-fonction pour détecter les allocations dynamiques sur la stack +// (par exemple : int n = read(); char buf[n];) +static void analyzeDynamicAllocasInFunction( + llvm::Function &F, + std::vector &out) +{ + using namespace llvm; + + if (F.isDeclaration()) + return; + + for (BasicBlock &BB : F) { + for (Instruction &I : BB) { + auto *AI = dyn_cast(&I); + if (!AI) + continue; + + // Taille d'allocation : on distingue trois cas : + // - constante immédiate -> pas une VLA + // - dérivée d'une constante simple -> pas une VLA (heuristique) + // - vraiment dépendante d'une valeur -> VLA / alloca variable + Value *arraySizeVal = AI->getArraySize(); + + // 1) Cas taille directement constante dans l'IR + if (llvm::isa(arraySizeVal)) + continue; // taille connue à la compilation, OK + + // 2) Heuristique "smart" : essayer de remonter à une constante + // via les stores dans une variable locale (tryGetConstFromValue). + // Exemple typique : + // int n = 6; + // char buf[n]; // en C : VLA, mais ici n est en fait constant + // + // Dans ce cas, on ne veut pas spammer avec un warning VLA : + // on traite ça comme une taille effectivement constante. + if (tryGetConstFromValue(arraySizeVal, F) != nullptr) + continue; + + // 3) Ici, on considère que c'est une vraie VLA / alloca dynamique + DynamicAllocaIssue issue; + issue.funcName = F.getName().str(); + issue.varName = AI->hasName() ? AI->getName().str() : std::string(""); + if (AI->getAllocatedType()) { + std::string tyStr; + llvm::raw_string_ostream rso(tyStr); + AI->getAllocatedType()->print(rso); + issue.typeName = rso.str(); + } else { + issue.typeName = ""; + } + issue.allocaInst = AI; + out.push_back(std::move(issue)); + } + } +} + +// Forward declaration pour la résolution d'alloca de tableau depuis un pointeur +static const llvm::AllocaInst* resolveArrayAllocaFromPointer(const llvm::Value *V, + llvm::Function &F, + std::vector &path); + +// Analyse intra-fonction pour détecter des accès potentiellement hors bornes +// sur des buffers alloués sur la stack (alloca). +static void analyzeStackBufferOverflowsInFunction( + llvm::Function &F, + std::vector &out) +{ + using namespace llvm; + + auto ranges = computeIntRangesFromICmps(F); + + for (BasicBlock &BB : F) { + for (Instruction &I : BB) { + auto *GEP = dyn_cast(&I); + if (!GEP) + continue; + + // 1) Trouver la base du pointeur (test, &test[0], ptr, etc.) + const Value *basePtr = GEP->getPointerOperand(); + std::vector aliasPath; + const AllocaInst *AI = resolveArrayAllocaFromPointer(basePtr, F, aliasPath); + if (!AI) + continue; + + // 2) Déterminer la taille logique du tableau ciblé et récupérer l'index + // On essaie d'abord de la déduire du type traversé par la GEP + // (cas struct S { char buf[10]; }; s.buf[i]) puis on retombe + // sur la taille de l'alloca pour les cas plus simples (char buf[10]). + StackSize arraySize = 0; + Value *idxVal = nullptr; + + Type *srcElemTy = GEP->getSourceElementType(); + + if (auto *arrTy = dyn_cast(srcElemTy)) { + // Cas direct : alloca [N x T]; GEP indices [0, i] + if (GEP->getNumIndices() < 2) + continue; + auto idxIt = GEP->idx_begin(); + ++idxIt; // saute le premier indice (souvent 0) + idxVal = idxIt->get(); + arraySize = arrTy->getNumElements(); + } else if (auto *ST = dyn_cast(srcElemTy)) { + // Cas struct avec champ tableau: + // %ptr = getelementptr inbounds %struct.S, %struct.S* %s, + // i32 0, i32 , i64 %i + // + // On attend donc au moins 3 indices: [0, field, i] + if (GEP->getNumIndices() >= 3) { + auto idxIt = GEP->idx_begin(); + + // premier indice (souvent 0) + auto *idx0 = dyn_cast(idxIt->get()); + ++idxIt; + // second indice: index de champ dans la struct + auto *fieldIdxC = dyn_cast(idxIt->get()); + ++idxIt; + + if (idx0 && fieldIdxC) { + unsigned fieldIdx = + static_cast(fieldIdxC->getZExtValue()); + if (fieldIdx < ST->getNumElements()) { + Type *fieldTy = ST->getElementType(fieldIdx); + if (auto *fieldArrTy = dyn_cast(fieldTy)) { + arraySize = fieldArrTy->getNumElements(); + // Troisième indice = index dans le tableau interne + idxVal = idxIt->get(); + } + } + } + } + } + + // Si on n'a pas réussi à déduire une taille via la GEP, + // on retombe sur la taille dérivée de l'alloca (cas char buf[10]; ptr = buf; ptr[i]). + if (arraySize == 0 || !idxVal) { + auto maybeCount = getAllocaElementCount(const_cast(AI)); + if (!maybeCount) + continue; + arraySize = *maybeCount; + if (arraySize == 0) + continue; + + // Pour ces cas-là, on considère le premier indice comme l'index logique. + if (GEP->getNumIndices() < 1) + continue; + auto idxIt = GEP->idx_begin(); + idxVal = idxIt->get(); + } + + std::string varName = AI->hasName() ? AI->getName().str() + : std::string(""); + + // "baseIdxVal" = variable de boucle "i" sans les casts (sext/zext...) + Value *baseIdxVal = idxVal; + while (auto *cast = dyn_cast(baseIdxVal)) { + baseIdxVal = cast->getOperand(0); + } + + // 4) Cas index constant : test[11] + if (auto *CIdx = dyn_cast(idxVal)) { + auto idxValue = CIdx->getSExtValue(); + if (idxValue < 0 || + static_cast(idxValue) >= arraySize) { + + for (User *GU : GEP->users()) { + if (auto *S = dyn_cast(GU)) { + StackBufferOverflow report; + report.funcName = F.getName().str(); + report.varName = varName; + report.arraySize = arraySize; + report.indexOrUpperBound = static_cast(idxValue); + report.isWrite = true; + report.indexIsConstant = true; + report.inst = S; + if (!aliasPath.empty()) { + std::reverse(aliasPath.begin(), aliasPath.end()); + std::string chain; + for (size_t i = 0; i < aliasPath.size(); ++i) { + chain += aliasPath[i]; + if (i + 1 < aliasPath.size()) + chain += " -> "; + } + report.aliasPath = chain; + } + out.push_back(std::move(report)); + } else if (auto *L = dyn_cast(GU)) { + StackBufferOverflow report; + report.funcName = F.getName().str(); + report.varName = varName; + report.arraySize = arraySize; + report.indexOrUpperBound = static_cast(idxValue); + report.isWrite = false; + report.indexIsConstant = true; + report.inst = L; + if (!aliasPath.empty()) { + std::reverse(aliasPath.begin(), aliasPath.end()); + std::string chain; + for (size_t i = 0; i < aliasPath.size(); ++i) { + chain += aliasPath[i]; + if (i + 1 < aliasPath.size()) + chain += " -> "; + } + report.aliasPath = chain; + } + out.push_back(std::move(report)); + } + } + } + continue; + } + + // 5) Cas index variable : test[i] / ptr[i] + // On regarde si on a un intervalle pour la valeur de base (i, pas le cast) + const Value *key = baseIdxVal; + + // Si l'index vient d'un load (pattern -O0 : load i, icmp, load i, gep), + // on utilise le pointeur sous-jacent comme clé (l'alloca de i). + if (auto *LI = dyn_cast(baseIdxVal)) { + key = LI->getPointerOperand(); + } + + auto itRange = ranges.find(key); + if (itRange == ranges.end()) { + // pas de borne connue => on ne dit rien ici + continue; + } + + const IntRange &R = itRange->second; + + // 5.a) Borne supérieure hors bornes: UB >= arraySize + if (R.hasUpper && R.upper >= 0 && + static_cast(R.upper) >= arraySize) { + + StackSize ub = static_cast(R.upper); + + for (User *GU : GEP->users()) { + if (auto *S = dyn_cast(GU)) { + StackBufferOverflow report; + report.funcName = F.getName().str(); + report.varName = varName; + report.arraySize = arraySize; + report.indexOrUpperBound = ub; + report.isWrite = true; + report.indexIsConstant = false; + report.inst = S; + if (!aliasPath.empty()) { + std::reverse(aliasPath.begin(), aliasPath.end()); + std::string chain; + for (size_t i = 0; i < aliasPath.size(); ++i) { + chain += aliasPath[i]; + if (i + 1 < aliasPath.size()) + chain += " -> "; + } + report.aliasPath = chain; + } + out.push_back(std::move(report)); + } else if (auto *L = dyn_cast(GU)) { + StackBufferOverflow report; + report.funcName = F.getName().str(); + report.varName = varName; + report.arraySize = arraySize; + report.indexOrUpperBound = ub; + report.isWrite = false; + report.indexIsConstant = false; + report.inst = L; + if (!aliasPath.empty()) { + std::reverse(aliasPath.begin(), aliasPath.end()); + std::string chain; + for (size_t i = 0; i < aliasPath.size(); ++i) { + chain += aliasPath[i]; + if (i + 1 < aliasPath.size()) + chain += " -> "; + } + report.aliasPath = chain; + } + out.push_back(std::move(report)); + } + } + } + + // 5.b) Borne inférieure négative: LB < 0 => index potentiellement négatif + if (R.hasLower && R.lower < 0) { + for (User *GU : GEP->users()) { + if (auto *S = dyn_cast(GU)) { + StackBufferOverflow report; + report.funcName = F.getName().str(); + report.varName = varName; + report.arraySize = arraySize; + report.isWrite = true; + report.indexIsConstant = false; + report.inst = S; + report.isLowerBoundViolation = true; + report.lowerBound = R.lower; + if (!aliasPath.empty()) { + std::reverse(aliasPath.begin(), aliasPath.end()); + std::string chain; + for (size_t i = 0; i < aliasPath.size(); ++i) { + chain += aliasPath[i]; + if (i + 1 < aliasPath.size()) + chain += " -> "; + } + report.aliasPath = chain; + } + out.push_back(std::move(report)); + } else if (auto *L = dyn_cast(GU)) { + StackBufferOverflow report; + report.funcName = F.getName().str(); + report.varName = varName; + report.arraySize = arraySize; + report.isWrite = false; + report.indexIsConstant = false; + report.inst = L; + report.isLowerBoundViolation = true; + report.lowerBound = R.lower; + if (!aliasPath.empty()) { + std::reverse(aliasPath.begin(), aliasPath.end()); + std::string chain; + for (size_t i = 0; i < aliasPath.size(); ++i) { + chain += aliasPath[i]; + if (i + 1 < aliasPath.size()) + chain += " -> "; + } + report.aliasPath = chain; + } + out.push_back(std::move(report)); + } + } + } + // Si R.hasUpper && R.upper < arraySize et (pas de LB problématique), + // on considère l'accès comme probablement sûr. + } + } +} + // ============================================================================ // Helpers // ============================================================================ +static void analyzeMemIntrinsicOverflowsInFunction( + llvm::Function &F, + const llvm::DataLayout &DL, + std::vector &out) +{ + using namespace llvm; + + if (F.isDeclaration()) + return; + + for (BasicBlock &BB : F) { + for (Instruction &I : BB) { + + // On s'intéresse uniquement aux appels (intrinsics ou libc) + auto *CB = dyn_cast(&I); + if (!CB) + continue; + + Function *callee = CB->getCalledFunction(); + if (!callee) + continue; + + StringRef name = callee->getName(); + + enum class MemKind { None, MemCpy, MemSet, MemMove }; + MemKind kind = MemKind::None; + + // 1) Cas intrinsics LLVM: llvm.memcpy.*, llvm.memset.*, llvm.memmove.* + if (auto *II = dyn_cast(CB)) { + switch (II->getIntrinsicID()) { + case Intrinsic::memcpy: kind = MemKind::MemCpy; break; + case Intrinsic::memset: kind = MemKind::MemSet; break; + case Intrinsic::memmove: kind = MemKind::MemMove; break; + default: break; + } + } + + // 2) Cas appels libc classiques ou symboles similaires + if (kind == MemKind::None) { + if (name == "memcpy" || name.contains("memcpy")) + kind = MemKind::MemCpy; + else if (name == "memset" || name.contains("memset")) + kind = MemKind::MemSet; + else if (name == "memmove" || name.contains("memmove")) + kind = MemKind::MemMove; + } + + if (kind == MemKind::None) + continue; + + // On attend au moins 3 arguments: dest, src/val, len + if (CB->arg_size() < 3) + continue; + + Value *dest = CB->getArgOperand(0); + + // Résolution heuristique : on enlève les casts/GEPI de surface + // et on remonte jusqu'à une alloca éventuelle. + const Value *cur = dest->stripPointerCasts(); + if (auto *GEP = dyn_cast(cur)) { + cur = GEP->getPointerOperand(); + } + const AllocaInst *AI = dyn_cast(cur); + if (!AI) + continue; + + auto maybeSize = getAllocaTotalSizeBytes(AI, DL); + if (!maybeSize) + continue; + StackSize destBytes = *maybeSize; + + Value *lenV = CB->getArgOperand(2); + auto *lenC = dyn_cast(lenV); + if (!lenC) + continue; // pour l'instant, on ne traite que les tailles constantes + + uint64_t len = lenC->getZExtValue(); + if (len <= destBytes) + continue; // pas de débordement évident + + MemIntrinsicIssue issue; + issue.funcName = F.getName().str(); + issue.varName = AI->hasName() ? AI->getName().str() : std::string(""); + issue.destSizeBytes = destBytes; + issue.lengthBytes = len; + issue.inst = &I; + + switch (kind) { + case MemKind::MemCpy: issue.intrinsicName = "memcpy"; break; + case MemKind::MemSet: issue.intrinsicName = "memset"; break; + case MemKind::MemMove: issue.intrinsicName = "memmove"; break; + default: break; + } + + out.push_back(std::move(issue)); + } + } +} + // Appelle-t-on une autre fonction que soi-même ? static bool hasNonSelfCall(const llvm::Function &F) { @@ -158,7 +1058,8 @@ static StackSize computeLocalStack(llvm::Function &F, const llvm::DataLayout &DL, AnalysisMode mode) { - switch (mode) { + switch (mode) + { case AnalysisMode::IR: return computeLocalStackIR(F, DL); case AnalysisMode::ABI: @@ -175,24 +1076,31 @@ static CallGraph buildCallGraph(llvm::Module &M) { CallGraph CG; - for (llvm::Function &F : M) { + for (llvm::Function &F : M) + { if (F.isDeclaration()) continue; auto &vec = CG[&F]; - for (llvm::BasicBlock &BB : F) { - for (llvm::Instruction &I : BB) { + for (llvm::BasicBlock &BB : F) + { + for (llvm::Instruction &I : BB) + { const llvm::Function *Callee = nullptr; - if (auto *CI = llvm::dyn_cast(&I)) { + if (auto *CI = llvm::dyn_cast(&I)) + { Callee = CI->getCalledFunction(); - } else if (auto *II = llvm::dyn_cast(&I)) { + } + else if (auto *II = llvm::dyn_cast(&I)) + { Callee = II->getCalledFunction(); } - if (Callee && !Callee->isDeclaration()) { + if (Callee && !Callee->isDeclaration()) + { vec.push_back(Callee); } } @@ -211,7 +1119,8 @@ static StackSize dfsComputeStack( const CallGraph &CG, const std::map &LocalStack, std::map &State, - InternalAnalysisState &Res) + InternalAnalysisState &Res +) { auto itState = State.find(F); if (itState != State.end()) { @@ -335,6 +1244,279 @@ static bool detectInfiniteSelfRecursion(llvm::Function &F) return true; } +// HELPERS +// Essaie de retrouver une alloca de tableau à partir d'un pointeur, +// en suivant les bitcast, GEP(0,0), et un pattern simple de pointeur local : +// char test[10]; +// char *ptr = test; +// ... load ptr ... ; gep -> ptr[i] +static const llvm::AllocaInst* +resolveArrayAllocaFromPointer(const llvm::Value *V, llvm::Function &F, std::vector &path) +{ + using namespace llvm; + + auto isArrayAlloca = [](const AllocaInst *AI) -> bool { + Type *T = AI->getAllocatedType(); + // On considère comme "buffer de stack" : + // - les vrais tableaux, + // - les allocas de type tableau (VLA côté IR), + // - les structs qui contiennent au moins un champ tableau. + if (T->isArrayTy() || AI->isArrayAllocation()) + return true; + + if (auto *ST = llvm::dyn_cast(T)) { + for (unsigned i = 0; i < ST->getNumElements(); ++i) { + if (ST->getElementType(i)->isArrayTy()) + return true; + } + } + return false; + }; + + // Pour éviter les boucles d'aliasing bizarres + SmallPtrSet visited; + const Value *cur = V; + + while (cur && !visited.contains(cur)) { + visited.insert(cur); + if (cur->hasName()) + path.push_back(cur->getName().str()); + + // Cas 1 : on tombe sur une alloca. + if (auto *AI = dyn_cast(cur)) { + if (isArrayAlloca(AI)) { + // Alloca d'un buffer de stack (tableau) : cible finale. + return AI; + } + + // Sinon, c'est très probablement une variable locale de type pointeur + // (char *ptr; char **pp; etc.). On parcourt les stores vers cette + // variable pour voir quelles valeurs lui sont assignées, et on + // tente de remonter jusqu'à une vraie alloca de tableau. + const AllocaInst *foundAI = nullptr; + + for (BasicBlock &BB : F) { + for (Instruction &I : BB) { + auto *SI = dyn_cast(&I); + if (!SI) + continue; + if (SI->getPointerOperand() != AI) + continue; + + const Value *storedPtr = SI->getValueOperand(); + std::vector subPath; + const AllocaInst *cand = + resolveArrayAllocaFromPointer(storedPtr, F, subPath); + if (!cand) + continue; + + if (!foundAI) { + foundAI = cand; + // Append subPath to path + path.insert(path.end(), subPath.begin(), subPath.end()); + } else if (foundAI != cand) { + // Plusieurs bases différentes : aliasing ambigu, + // on préfère abandonner plutôt que de se tromper. + return nullptr; + } + } + } + return foundAI; + } + + // Cas 2 : bitcast -> on remonte l'opérande. + if (auto *BC = dyn_cast(cur)) { + cur = BC->getOperand(0); + continue; + } + + // Cas 3 : GEP -> on remonte sur le pointeur de base. + if (auto *GEP = dyn_cast(cur)) { + cur = GEP->getPointerOperand(); + continue; + } + + // Cas 4 : load d'un pointeur. Exemple typique : + // char *ptr = test; + // char *p2 = ptr; + // char **pp = &ptr; + // (*pp)[i] = ... + // + // On remonte au "container" du pointeur (variable locale, ou autre valeur) + // en suivant l'opérande du load. + if (auto *LI = dyn_cast(cur)) { + cur = LI->getPointerOperand(); + continue; + } + + // Cas 5 : PHI de pointeurs (fusion de plusieurs alias) : + // on tente de résoudre chaque incoming et on s'assure qu'ils + // pointent tous vers la même alloca de tableau. + if (auto *PN = dyn_cast(cur)) { + const AllocaInst *foundAI = nullptr; + std::vector phiPath; + for (unsigned i = 0; i < PN->getNumIncomingValues(); ++i) { + const Value *inV = PN->getIncomingValue(i); + std::vector subPath; + const AllocaInst *cand = + resolveArrayAllocaFromPointer(inV, F, subPath); + if (!cand) + continue; + if (!foundAI) { + foundAI = cand; + phiPath = subPath; + } else if (foundAI != cand) { + // PHI mélange plusieurs bases différentes : trop ambigu. + return nullptr; + } + } + path.insert(path.end(), phiPath.begin(), phiPath.end()); + return foundAI; + } + + // Autres cas (arguments, globales complexes, etc.) : on arrête l'heuristique. + break; + } + + return nullptr; +} + +// Analyse intra-fonction pour détecter plusieurs stores dans un même buffer de stack. +// Heuristique : on compte le nombre de StoreInst qui écrivent dans un GEP basé sur +// une alloca de tableau sur la stack. Si une même alloca reçoit plus d'un store, +// on émet un warning. +static void analyzeMultipleStoresInFunction( + llvm::Function &F, + std::vector &out) +{ + using namespace llvm; + + if (F.isDeclaration()) + return; + + struct Info { + std::size_t storeCount = 0; + llvm::SmallPtrSet indexKeys; + const AllocaInst *AI = nullptr; + }; + + std::map infoMap; + + for (BasicBlock &BB : F) { + for (Instruction &I : BB) { + auto *S = dyn_cast(&I); + if (!S) + continue; + + Value *ptr = S->getPointerOperand(); + auto *GEP = dyn_cast(ptr); + if (!GEP) + continue; + + // On remonte à la base pour trouver une alloca de tableau sur la stack. + const Value *basePtr = GEP->getPointerOperand(); + std::vector dummyAliasPath; + const AllocaInst *AI = resolveArrayAllocaFromPointer(basePtr, F, dummyAliasPath); + if (!AI) + continue; + + // On récupère l'expression d'index utilisée dans le GEP. + Value *idxVal = nullptr; + Type *srcElemTy = GEP->getSourceElementType(); + + if (auto *arrTy = dyn_cast(srcElemTy)) { + // Pattern [N x T]* -> indices [0, i] + if (GEP->getNumIndices() < 2) + continue; + auto idxIt = GEP->idx_begin(); + ++idxIt; // saute le premier indice (souvent 0) + idxVal = idxIt->get(); + } else { + // Pattern T* -> indice unique [i] (cas char *ptr = test; ptr[i]) + if (GEP->getNumIndices() < 1) + continue; + auto idxIt = GEP->idx_begin(); + idxVal = idxIt->get(); + } + + if (!idxVal) + continue; + + // On normalise un peu la clé d'index en enlevant les casts SSA. + const Value *idxKey = idxVal; + while (auto *cast = dyn_cast(const_cast(idxKey))) { + idxKey = cast->getOperand(0); + } + + auto &info = infoMap[AI]; + info.AI = AI; + info.storeCount++; + info.indexKeys.insert(idxKey); + } + } + + // Construction des warnings pour chaque buffer qui reçoit plusieurs stores. + for (auto &entry : infoMap) { + const AllocaInst *AI = entry.first; + const Info &info = entry.second; + + if (info.storeCount <= 1) + continue; // un seul store -> pas de warning + + MultipleStoreIssue issue; + issue.funcName = F.getName().str(); + issue.varName = AI->hasName() ? AI->getName().str() : std::string(""); + issue.storeCount = info.storeCount; + issue.distinctIndexCount = info.indexKeys.size(); + issue.allocaInst = AI; + + out.push_back(std::move(issue)); + } +} + +// HELPERS +static const llvm::ConstantInt* tryGetConstFromValue(const llvm::Value *V, + const llvm::Function &F) +{ + using namespace llvm; + + // On enlève d'abord les cast (sext/zext/trunc, etc.) pour arriver + // à la vraie valeur “de base”. + const Value *cur = V; + while (auto *cast = dyn_cast(cur)) + { + cur = cast->getOperand(0); + } + + // Cas trivial : c'est déjà une constante entière. + if (auto *C = dyn_cast(cur)) + return C; + + // Cas -O0 typique : on compare un load d'une variable locale. + auto *LI = dyn_cast(cur); + if (!LI) + return nullptr; + + const Value *ptr = LI->getPointerOperand(); + const ConstantInt *found = nullptr; + + // Version ultra-simple : on cherche un store de constante dans la fonction. + for (const BasicBlock &BB : F) { + for (const Instruction &I : BB) { + auto *SI = dyn_cast(&I); + if (!SI) + continue; + if (SI->getPointerOperand() != ptr) + continue; + if (auto *C = dyn_cast(SI->getValueOperand())) { + // On garde la dernière constante trouvée (si plusieurs stores, c'est naïf). + found = C; + } + } + } + return found; +} + // ============================================================================ // API publique : analyzeModule / analyzeFile // ============================================================================ @@ -404,6 +1586,374 @@ AnalysisResult analyzeModule(llvm::Module &mod, result.functions.push_back(std::move(fr)); } + // 6) Détection des dépassements de buffer sur la stack (analyse intra-fonction) + std::vector bufferIssues; + for (llvm::Function &F : mod) { + if (F.isDeclaration()) + continue; + analyzeStackBufferOverflowsInFunction(F, bufferIssues); + } + + // 7) Affichage des problèmes détectés (pour l'instant, sortie directe) + for (const auto &issue : bufferIssues) + { + unsigned line = 0; + unsigned column = 0; + bool haveLoc = false; + + if (issue.inst) { + llvm::DebugLoc DL = issue.inst->getDebugLoc(); + if (DL) + { + line = DL.getLine(); + column = DL.getCol(); + haveLoc = true; + } + } + + bool isUnreachable = false; + { + using namespace llvm; + + if (issue.inst) { + auto *BB = issue.inst->getParent(); + + // Parcourt les prédécesseurs du bloc pour voir si certains + // ont une branche conditionnelle avec une condition constante. + for (auto *Pred : predecessors(BB)) { + auto *BI = dyn_cast(Pred->getTerminator()); + if (!BI || !BI->isConditional()) + continue; + + auto *CI = dyn_cast(BI->getCondition()); + if (!CI) + continue; + + const llvm::Function &Func = *issue.inst->getFunction(); + + auto *C0 = tryGetConstFromValue(CI->getOperand(0), Func); + auto *C1 = tryGetConstFromValue(CI->getOperand(1), Func); + if (!C0 || !C1) + continue; + + // Évalue le résultat de l'ICmp pour ces constantes (implémentation maison). + bool condTrue = false; + auto pred = CI->getPredicate(); + const auto &v0 = C0->getValue(); + const auto &v1 = C1->getValue(); + + switch (pred) { + case ICmpInst::ICMP_EQ: + condTrue = (v0 == v1); + break; + case ICmpInst::ICMP_NE: + condTrue = (v0 != v1); + break; + case ICmpInst::ICMP_SLT: + condTrue = v0.slt(v1); + break; + case ICmpInst::ICMP_SLE: + condTrue = v0.sle(v1); + break; + case ICmpInst::ICMP_SGT: + condTrue = v0.sgt(v1); + break; + case ICmpInst::ICMP_SGE: + condTrue = v0.sge(v1); + break; + case ICmpInst::ICMP_ULT: + condTrue = v0.ult(v1); + break; + case ICmpInst::ICMP_ULE: + condTrue = v0.ule(v1); + break; + case ICmpInst::ICMP_UGT: + condTrue = v0.ugt(v1); + break; + case ICmpInst::ICMP_UGE: + condTrue = v0.uge(v1); + break; + default: + // On ne traite pas d'autres prédicats exotiques ici + continue; + } + + // Branchement du type: + // br i1 %cond, label %then, label %else + // Successeur 0 pris si condTrue == true + // Successeur 1 pris si condTrue == false + if (BB == BI->getSuccessor(0) && condTrue == false) { + // Le bloc "then" n'est jamais atteint. + isUnreachable = true; + } else if (BB == BI->getSuccessor(1) && condTrue == true) { + // Le bloc "else" n'est jamais atteint. + isUnreachable = true; + } + } + } + } + + std::ostringstream body; + + if (issue.isLowerBoundViolation) { + body << " [!!] potential negative index on variable '" + << issue.varName << "' (size " << issue.arraySize << ")\n"; + if (!issue.aliasPath.empty()) { + body << " alias path: " << issue.aliasPath << "\n"; + } + body << " inferred lower bound for index expression: " + << issue.lowerBound << " (index may be < 0)\n"; + } else { + body << " [!!] potential stack buffer overflow on variable '" + << issue.varName << "' (size " << issue.arraySize << ")\n"; + if (!issue.aliasPath.empty()) { + body << " alias path: " << issue.aliasPath << "\n"; + } + if (issue.indexIsConstant) { + body << " constant index " << issue.indexOrUpperBound + << " is out of bounds (0.." + << (issue.arraySize ? issue.arraySize - 1 : 0) + << ")\n"; + } else { + body << " index variable may go up to " + << issue.indexOrUpperBound + << " (array last valid index: " + << (issue.arraySize ? issue.arraySize - 1 : 0) << ")\n"; + } + } + + if (issue.isWrite) + { + body << " (this is a write access)\n"; + } + else + { + body << " (this is a read access)\n"; + } + if (isUnreachable) + { + body << " [info] this access appears unreachable at runtime " + "(condition is always false for this branch)\n"; + } + Diagnostic diag; + diag.funcName = issue.funcName; + diag.line = haveLoc ? line : 0; + diag.column = haveLoc ? column : 0; + diag.severity = DiagnosticSeverity::Warning; + diag.message = body.str(); + result.diagnostics.push_back(std::move(diag)); + } + + // 8) Détection des allocations dynamiques sur la stack (VLA / alloca variable) + std::vector dynAllocaIssues; + for (llvm::Function &F : mod) { + if (F.isDeclaration()) + continue; + analyzeDynamicAllocasInFunction(F, dynAllocaIssues); + } + + // 9) Affichage des allocations dynamiques détectées + for (const auto &d : dynAllocaIssues) + { + unsigned line = 0; + unsigned column = 0; + bool haveLoc = false; + if (d.allocaInst) { + llvm::DebugLoc DL = d.allocaInst->getDebugLoc(); + if (DL) { + line = DL.getLine(); + column = DL.getCol(); + haveLoc = true; + } + } + + std::ostringstream body; + + body << " [!] dynamic stack allocation detected for variable '" + << d.varName << "'\n"; + body << " allocated type: " << d.typeName << "\n"; + body << " size of this allocation is not compile-time constant " + "(VLA / variable alloca) and may lead to unbounded stack usage\n"; + + Diagnostic diag; + diag.funcName = d.funcName; + diag.line = haveLoc ? line : 0; + diag.column = haveLoc ? column : 0; + diag.severity = DiagnosticSeverity::Warning; + diag.message = body.str(); + result.diagnostics.push_back(std::move(diag)); + } + + // 10) Détection des débordements via memcpy/memset sur des buffers de stack + std::vector memIssues; + for (llvm::Function &F : mod) { + if (F.isDeclaration()) + continue; + analyzeMemIntrinsicOverflowsInFunction(F, DL, memIssues); + } + + for (const auto &m : memIssues) + { + unsigned line = 0; + unsigned column = 0; + bool haveLoc = false; + if (m.inst) { + llvm::DebugLoc DL = m.inst->getDebugLoc(); + if (DL) { + line = DL.getLine(); + column = DL.getCol(); + haveLoc = true; + } + } + + std::ostringstream body; + + body << "Function: " << m.funcName; + if (haveLoc) { + body << " (line " << line << ", column " << column << ")"; + } + body << "\n"; + + body << " [!!] potential stack buffer overflow in " + << m.intrinsicName << " on variable '" + << m.varName << "'\n"; + body << " destination stack buffer size: " + << m.destSizeBytes << " bytes\n"; + body << " requested " << m.lengthBytes + << " bytes to be copied/initialized\n"; + + Diagnostic diag; + diag.funcName = m.funcName; + diag.line = haveLoc ? line : 0; + diag.column = haveLoc ? column : 0; + diag.severity = DiagnosticSeverity::Warning; + diag.message = body.str(); + result.diagnostics.push_back(std::move(diag)); + } + + // 11) Détection de plusieurs stores dans un même buffer de stack + std::vector multiStoreIssues; + for (llvm::Function &F : mod) { + if (F.isDeclaration()) + continue; + analyzeMultipleStoresInFunction(F, multiStoreIssues); + } + + for (const auto &ms : multiStoreIssues) + { + unsigned line = 0; + unsigned column = 0; + bool haveLoc = false; + if (ms.allocaInst) { + llvm::DebugLoc DL = ms.allocaInst->getDebugLoc(); + if (DL) { + line = DL.getLine(); + column = DL.getCol(); + haveLoc = true; + } + } + + std::ostringstream body; + + body << " [!Info] multiple stores to stack buffer '" + << ms.varName << "' in this function (" + << ms.storeCount << " store instruction(s)"; + if (ms.distinctIndexCount > 0) + { + body << ", " << ms.distinctIndexCount + << " distinct index expression(s)"; + } + body << ")\n"; + + if (ms.distinctIndexCount == 1) + { + body << " all stores use the same index expression " + "(possible redundant or unintended overwrite)\n"; + } + else if (ms.distinctIndexCount > 1) + { + body << " stores use different index expressions; " + "verify indices are correct and non-overlapping\n"; + } + + Diagnostic diag; + diag.funcName = ms.funcName; + diag.line = haveLoc ? line : 0; + diag.column = haveLoc ? column : 0; + diag.severity = DiagnosticSeverity::Info; + diag.message = body.str(); + result.diagnostics.push_back(std::move(diag)); + } + + // 12) Détection de fuite de pointeurs de stack (use-after-return potentiel) + std::vector escapeIssues; + for (llvm::Function &F : mod) { + if (F.isDeclaration()) + continue; + analyzeStackPointerEscapesInFunction(F, escapeIssues); + } + + for (const auto &e : escapeIssues) + { + unsigned line = 0; + unsigned column = 0; + bool haveLoc = false; + if (e.inst) { + llvm::DebugLoc DL = e.inst->getDebugLoc(); + if (DL) { + line = DL.getLine(); + column = DL.getCol(); + haveLoc = true; + } + } + + std::ostringstream body; + + body << " [!!] stack pointer escape: address of variable '" + << e.varName << "' escapes this function\n"; + + if (e.escapeKind == "return") { + body << " escape via return statement " + "(pointer to stack returned to caller)\n"; + } else if (e.escapeKind == "store_global") { + if (!e.targetName.empty()) { + body << " stored into global variable '" + << e.targetName + << "' (pointer may be used after the function returns)\n"; + } else { + body << " stored into a global variable " + "(pointer may be used after the function returns)\n"; + } + } else if (e.escapeKind == "store_unknown") { + body << " stored through a non-local pointer " + "(e.g. via an out-parameter; pointer may outlive this function)\n"; + if (!e.targetName.empty()) { + body << " destination pointer/value name: '" + << e.targetName << "'\n"; + } + } else if (e.escapeKind == "call_callback") { + body << " address passed as argument to an indirect call " + "(callback may capture the pointer beyond this function)\n"; + } else if (e.escapeKind == "call_arg") { + if (!e.targetName.empty()) { + body << " address passed as argument to function '" + << e.targetName + << "' (callee may capture the pointer beyond this function)\n"; + } else { + body << " address passed as argument to a function " + "(callee may capture the pointer beyond this function)\n"; + } + } + + Diagnostic diag; + diag.funcName = e.funcName; + diag.line = haveLoc ? line : 0; + diag.column = haveLoc ? column : 0; + diag.severity = DiagnosticSeverity::Warning; + diag.message = body.str(); + result.diagnostics.push_back(std::move(diag)); + } + return result; } @@ -481,6 +2031,8 @@ AnalysisResult analyzeFile(const std::string &filename, std::vector args; args.push_back("-emit-llvm"); args.push_back("-S"); + args.push_back("-g"); + args.push_back("-fno-discard-value-names"); args.push_back(filename); compilerlib::OutputMode mode = compilerlib::OutputMode::ToMemory; auto res = compilerlib::compile(args, mode); @@ -524,4 +2076,157 @@ AnalysisResult analyzeFile(const std::string &filename, return analyzeModule(*mod, config); } +// --------------------------------------------------------------------------- +// JSON / SARIF serialization helpers +// --------------------------------------------------------------------------- + +namespace { + +// Petit helper pour échapper les chaînes JSON. +static std::string jsonEscape(const std::string &s) +{ + std::string out; + out.reserve(s.size() + 16); + for (char c : s) { + switch (c) { + case '\\': out += "\\\\"; break; + case '\"': out += "\\\""; break; + case '\n': out += "\\n"; break; + case '\r': out += "\\r"; break; + case '\t': out += "\\t"; break; + default: + if (static_cast(c) < 0x20) { + char buf[7]; + std::snprintf(buf, sizeof(buf), "\\u%04x", c & 0xFF); + out += buf; + } else { + out += c; + } + break; + } + } + return out; +} + +static const char *severityToJsonString(DiagnosticSeverity sev) +{ + switch (sev) { + case DiagnosticSeverity::Info: return "info"; + case DiagnosticSeverity::Warning: return "warning"; + case DiagnosticSeverity::Error: return "error"; + } + return "info"; +} + +static const char *severityToSarifLevel(DiagnosticSeverity sev) +{ + // SARIF levels: "none", "note", "warning", "error" + switch (sev) { + case DiagnosticSeverity::Info: return "note"; + case DiagnosticSeverity::Warning: return "warning"; + case DiagnosticSeverity::Error: return "error"; + } + return "note"; +} + +} // anonymous namespace + +std::string toJson(const AnalysisResult &result, + const std::string &inputFile) +{ + std::ostringstream os; + os << "{\n"; + os << " \"inputFile\": \"" << jsonEscape(inputFile) << "\",\n"; + os << " \"mode\": \"" << (result.config.mode == AnalysisMode::IR ? "IR" : "ABI") << "\",\n"; + os << " \"stackLimit\": " << result.config.stackLimit << ",\n"; + + // Fonctions + os << " \"functions\": [\n"; + for (std::size_t i = 0; i < result.functions.size(); ++i) { + const auto &f = result.functions[i]; + os << " {\n"; + os << " \"name\": \"" << jsonEscape(f.name) << "\",\n"; + os << " \"localStack\": " << f.localStack << ",\n"; + os << " \"maxStack\": " << f.maxStack << ",\n"; + os << " \"isRecursive\": " << (f.isRecursive ? "true" : "false") << ",\n"; + os << " \"hasInfiniteSelfRecursion\": " << (f.hasInfiniteSelfRecursion ? "true" : "false") << ",\n"; + os << " \"exceedsLimit\": " << (f.exceedsLimit ? "true" : "false") << "\n"; + os << " }"; + if (i + 1 < result.functions.size()) + os << ","; + os << "\n"; + } + os << " ],\n"; + + // Diagnostics + os << " \"diagnostics\": [\n"; + for (std::size_t i = 0; i < result.diagnostics.size(); ++i) { + const auto &d = result.diagnostics[i]; + os << " {\n"; + os << " \"function\": \"" << jsonEscape(d.funcName) << "\",\n"; + os << " \"line\": " << d.line << ",\n"; + os << " \"column\": " << d.column << ",\n"; + os << " \"severity\": \"" << severityToJsonString(d.severity) << "\",\n"; + os << " \"message\": \"" << jsonEscape(d.message) << "\"\n"; + os << " }"; + if (i + 1 < result.diagnostics.size()) + os << ","; + os << "\n"; + } + os << " ]\n"; + os << "}\n"; + return os.str(); +} + +std::string toSarif(const AnalysisResult &result, + const std::string &inputFile, + const std::string &toolName, + const std::string &toolVersion) +{ + std::ostringstream os; + os << "{\n"; + os << " \"version\": \"2.1.0\",\n"; + os << " \"$schema\": \"https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0.json\",\n"; + os << " \"runs\": [\n"; + os << " {\n"; + os << " \"tool\": {\n"; + os << " \"driver\": {\n"; + os << " \"name\": \"" << jsonEscape(toolName) << "\",\n"; + os << " \"version\": \"" << jsonEscape(toolVersion) << "\"\n"; + os << " }\n"; + os << " },\n"; + os << " \"results\": [\n"; + + for (std::size_t i = 0; i < result.diagnostics.size(); ++i) { + const auto &d = result.diagnostics[i]; + os << " {\n"; + // Pour le moment, un seul ruleId générique; tu pourras le spécialiser plus tard. + os << " \"ruleId\": \"CORETRACE_STACK_DIAGNOSTIC\",\n"; + os << " \"level\": \"" << severityToSarifLevel(d.severity) << "\",\n"; + os << " \"message\": { \"text\": \"" << jsonEscape(d.message) << "\" },\n"; + os << " \"locations\": [\n"; + os << " {\n"; + os << " \"physicalLocation\": {\n"; + os << " \"artifactLocation\": { \"uri\": \"" << jsonEscape(inputFile) << "\" },\n"; + os << " \"region\": {\n"; + os << " \"startLine\": " << d.line << ",\n"; + os << " \"startColumn\": " << d.column << "\n"; + os << " }\n"; + os << " }\n"; + os << " }\n"; + os << " ]\n"; + os << " }"; + if (i + 1 < result.diagnostics.size()) + os << ","; + os << "\n"; + } + + os << " ]\n"; + os << " }\n"; + os << " ]\n"; + os << "}\n"; + + return os.str(); +} + } // namespace ctrace::stack diff --git a/src/mangle.cpp b/src/mangle.cpp new file mode 100644 index 0000000..7918bb4 --- /dev/null +++ b/src/mangle.cpp @@ -0,0 +1,82 @@ +#include "mangle.hpp" + +namespace ctrace_tools { + + std::string mangleFunction(const std::string& namespaceName, + const std::string& functionName, + const std::vector& paramTypes) + { + std::stringstream mangled; + + // Préfixe standard pour les symboles C++ dans l'Itanium ABI + mangled << "_Z"; + + // Si un namespace est présent, on utilise 'N' et on encode le nom + if (!namespaceName.empty()) + { + mangled << "N"; + mangled << namespaceName.length() << namespaceName; + } + + // Ajouter le nom de la fonction avec sa longueur + mangled << functionName.length() << functionName; + + // Encoder les types de paramètres + for (const std::string& param : paramTypes) { + if (param == "int") + { + mangled << "i"; + } + else if (param == "double") + { + mangled << "d"; + } + else if (param == "char") + { + mangled << "c"; + } + else if (param == "std::string") + { + mangled << "Ss"; // 'S' pour substitution, 's' pour std::string + } + else if (param == "float") + { + mangled << "f"; + } + else if (param == "bool") + { + mangled << "b"; + } + else if (param == "void") + { + mangled << "v"; + } + else { + // Pour les types complexes ou non reconnus, encoder avec longueur + nom + mangled << param.length() << param; + } + } + + // Fermer le namespace avec 'E' si utilisé + if (!namespaceName.empty()) + { + mangled << "E"; + } + + return mangled.str(); + } + + std::string demangle(const char *name) + { + int status = 0; + char* demangled = abi::__cxa_demangle(name, nullptr, nullptr, &status); + + std::string result = (status == 0 && demangled) + ? demangled + : name; + + free(demangled); + + return result; + } +}; From abf21b4312ecd9f6f1ef76651841a12e9a594357 Mon Sep 17 00:00:00 2001 From: Hugo Date: Wed, 26 Nov 2025 19:22:35 +0900 Subject: [PATCH 07/14] Feat(build): Might be used in another project as a library --- CMakeLists.txt | 83 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 69 insertions(+), 14 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 48f4424..2c8db8f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,6 +27,18 @@ FetchContent_MakeAvailable(cc) message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") +# Options de build +option(BUILD_CLI "Build stack_usage_analyzer CLI tool" ON) +option(BUILD_SHARED_LIB "Build shared library variant" ON) + +# =========================== +# Sources communs +# =========================== +set(STACK_ANALYZER_SOURCES + src/StackUsageAnalyzer.cpp + src/mangle.cpp +) + include_directories(${LLVM_INCLUDE_DIRS}) add_definitions(${LLVM_DEFINITIONS}) @@ -36,33 +48,76 @@ add_definitions(${LLVM_DEFINITIONS}) # support # ) - # ===== LIBRARY ===== -# Contient ta logique d'analyse (pas de main()) add_library(stack_usage_analyzer_lib - src/StackUsageAnalyzer.cpp + ${STACK_ANALYZER_SOURCES} ) +# target_include_directories(stack_usage_analyzer_lib +# PUBLIC +# ${CMAKE_CURRENT_SOURCE_DIR}/include +# ${LLVM_INCLUDE_DIRS} +# ) + +# FOR USE WITH FETCHCONTENT target_include_directories(stack_usage_analyzer_lib PUBLIC - ${CMAKE_CURRENT_SOURCE_DIR}/include + $ + $ ${LLVM_INCLUDE_DIRS} ) +# ALIAS FOR USE WITH FETCHCONTENT +add_library(coretrace::stack_usage_analyzer_lib ALIAS stack_usage_analyzer_lib) + +# Replace this one : target_link_libraries(stack_usage_analyzer_lib PUBLIC ${llvm_libs} cc::compilerlib_static ) +# by this one : +if(LLVM_LINK_LLVM_DYLIB) + target_link_libraries(stack_usage_analyzer_lib + PUBLIC + LLVM + cc::compilerlib_static + ) +else() + llvm_map_components_to_libnames(llvm_libs + core + irreader + support + ) + target_link_libraries(stack_usage_analyzer_lib + PUBLIC + ${llvm_libs} + cc::compilerlib_static + ) +endif() + +# # ===== CLI BINARY ===== +# add_executable(stack_usage_analyzer +# main.cpp +# ) -# ===== CLI BINARY ===== -add_executable(stack_usage_analyzer - main.cpp -) +# # target_link_libraries(stack_usage_analyzer PRIVATE ${llvm_libs}) +# target_link_libraries(stack_usage_analyzer +# PRIVATE +# stack_usage_analyzer_lib +# cc::compilerlib_static +# ) -# target_link_libraries(stack_usage_analyzer PRIVATE ${llvm_libs}) -target_link_libraries(stack_usage_analyzer - PRIVATE - stack_usage_analyzer_lib - cc::compilerlib_static -) +# ===== CLI BINARY ===== +if(BUILD_CLI) + add_executable(stack_usage_analyzer + main.cpp + ) + + target_link_libraries(stack_usage_analyzer + PRIVATE + stack_usage_analyzer_lib + # pas besoin de relinker cc::compilerlib_static ici, + # il est déjà dans la lib + ) +endif() From b5dafbe8fe82e568bcd0ea0af278cd51303f7f61 Mon Sep 17 00:00:00 2001 From: Hugo Date: Wed, 26 Nov 2025 19:27:09 +0900 Subject: [PATCH 08/14] Doc: update Readme --- README.md | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/README.md b/README.md index 81c20c9..ca144c2 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,11 @@ ./stack_usage_analyzer --mode=[abi/ir] test.[ll/c/cpp] ``` +``` +--quiet coupe complètement les diagnostics +--warnings-only garde seulement les diagnostics importants +```` + ### Example Given this code: @@ -116,3 +121,33 @@ Function: main - Define json API - Unmangling symbols +--- + +#### 9. Détection de fuite de stack pointer + +Exemples : +```c +char buf[10]; +return buf; // renvoi pointeur vers stack → use-after-return +``` + +Ou stockage : + +```c +global = buf; // leaking address of stack variable +``` + +--- + +Actually done: + +- 1. adding VLA : Detection of potentially dangerous dynamic alloca +- 2. Detection of memcpy/memset on stack buffers +- 3. Warning when a function performs multiple stores into the same buffer +- 4. Deeper traversal analysis: constraint propagation +- 5. Detection of deep indirection in aliasing +- 6. Detection of overflow in a struct containing an internal array +- 7. Detection of stack pointer leaks: + - store_unknown -> storing the pointer in a non-local location (typically out-parameter, heap, etc.) + - call_callback -> passing it to a callback (indirect call) + - call_arg -> passing it as an argument to a direct function, potentially capturable From 7307556081beb8e5edf549e670fc24ee8f6b8078 Mon Sep 17 00:00:00 2001 From: Hugo Date: Wed, 26 Nov 2025 19:38:55 +0900 Subject: [PATCH 09/14] fix: resolve compilation error --- CMakeLists.txt | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2c8db8f..22bd234 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,9 +1,15 @@ cmake_minimum_required(VERSION 3.16) project(stack_usage_analyzer) -include(${CMAKE_SOURCE_DIR}/cmake/CheckLLVMVersion.cmake) -set(LLVM_MIN_REQUIRED_VERSION "19" CACHE STRING "Minimum required LLVM version") -check_llvm_version(${LLVM_MIN_REQUIRED_VERSION}) +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake") +include(CheckLLVMVersion OPTIONAL) + +if(COMMAND check_llvm_version) + set(LLVM_MIN_REQUIRED_VERSION "19" CACHE STRING "Minimum required LLVM version") + check_llvm_version(${LLVM_MIN_REQUIRED_VERSION}) +else() + message(WARNING "check_llvm_version() not available, skipping LLVM version check") +endif() set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) From 56e0e1cd754464bffa3cfadd307164595d72d056 Mon Sep 17 00:00:00 2001 From: Hugo Date: Wed, 26 Nov 2025 19:48:31 +0900 Subject: [PATCH 10/14] fix(extern-project): fix bad alias --- extern-project/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extern-project/CMakeLists.txt b/extern-project/CMakeLists.txt index cd5ad94..46b1c9b 100644 --- a/extern-project/CMakeLists.txt +++ b/extern-project/CMakeLists.txt @@ -19,5 +19,5 @@ add_executable(sa_consumer src/main.cpp) target_link_libraries(sa_consumer PRIVATE - coretrace::stack_usage_analyzer_static + coretrace::stack_usage_analyzer_lib ) From 9a14e9f9e0b804e007b35c8042848fc12484aba9 Mon Sep 17 00:00:00 2001 From: Hugo Date: Thu, 27 Nov 2025 14:25:30 +0900 Subject: [PATCH 11/14] chore(cmake): rename some stuff --- CMakeLists.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 22bd234..e456805 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,7 +13,7 @@ endif() set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(CMAKE_CXX_EXTENSIONS OFF) # Optionnel mais recommandé +set(CMAKE_CXX_EXTENSIONS OFF) set(LLVM_LINK_LLVM_DYLIB ON) @@ -38,7 +38,7 @@ option(BUILD_CLI "Build stack_usage_analyzer CLI tool" ON) option(BUILD_SHARED_LIB "Build shared library variant" ON) # =========================== -# Sources communs +# Communs Sources # =========================== set(STACK_ANALYZER_SOURCES src/StackUsageAnalyzer.cpp @@ -55,7 +55,7 @@ add_definitions(${LLVM_DEFINITIONS}) # ) # ===== LIBRARY ===== -add_library(stack_usage_analyzer_lib +add_library(stack_usage_analyzer_lib STATIC ${STACK_ANALYZER_SOURCES} ) From 0bad13230c20b25cc900ccb49af7bb8e756bcb6f Mon Sep 17 00:00:00 2001 From: Hugo Date: Thu, 27 Nov 2025 16:01:48 +0900 Subject: [PATCH 12/14] test(test/ci): adding ci and test suite --- .github/workflows/ci.yml | 30 +++++++++ run_test.py | 135 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 165 insertions(+) create mode 100644 .github/workflows/ci.yml create mode 100755 run_test.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..4a68d17 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,30 @@ +name: CI + +on: + push: + branches: [ main, master ] + pull_request: + +jobs: + build-and-test: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Install build dependencies + run: | + sudo apt-get update + sudo apt-get install -y cmake ninja-build build-essential python3 + + - name: Configure and build (CMake) + run: | + mkdir -p build + cd build + cmake .. -G Ninja -DCMAKE_BUILD_TYPE=Release + ninja stack_usage_analyzer + + - name: Run analyzer tests (Python framework) + run: | + python3 run_tests.py diff --git a/run_test.py b/run_test.py new file mode 100755 index 0000000..484e0e1 --- /dev/null +++ b/run_test.py @@ -0,0 +1,135 @@ +#!/usr/bin/env python3 +import sys +import subprocess +from pathlib import Path + +# Chemin vers ton binaire d'analyse +ANALYZER = Path("./build/stack_usage_analyzer") # à adapter si besoin +TEST_DIR = Path("test") # dossier contenant les .c + + +def normalize(s: str) -> str: + """ + Normalise les espaces pour rendre les comparaisons plus robustes : + - supprime les espaces inutiles en début/fin de ligne + - remplace les séquences d'espaces par un seul espace + - garde les sauts de lignes + """ + lines = [] + for line in s.splitlines(): + line = line.rstrip("\n") + # "a b c" -> "a b c" + parts = line.strip().split() + lines.append(" ".join(parts)) + return "\n".join(lines).strip() + + +def extract_expectations(c_path: Path): + """ + Extrait les blocs de commentaires d'attendus dans un fichier .c. + + On cherche les commentaires qui commencent par "// at line". + On prend toutes les lignes de commentaires qui suivent. + """ + expectations = [] + lines = c_path.read_text().splitlines() + i = 0 + n = len(lines) + + while i < n: + raw = lines[i] + stripped = raw.lstrip() + + # Début d'un bloc d'attendu + if stripped.startswith("// at line"): + comment_block = [raw] + i += 1 + # Récupère toutes les lignes "// ..." qui suivent + while i < n and lines[i].lstrip().startswith("//"): + comment_block.append(lines[i]) + i += 1 + + # Nettoyage : retirer les "//" et les indentations + cleaned_lines = [] + for c in comment_block: + s = c.lstrip() + if s.startswith("//"): + s = s[2:] # enlève "//" + cleaned_lines.append(s.lstrip()) + + expectation_text = "\n".join(cleaned_lines) + expectations.append(expectation_text) + else: + i += 1 + + return expectations + + +def run_analyzer_on_file(c_path: Path) -> str: + """ + Lance ton analyseur sur un fichier C et récupère stdout+stderr. + """ + result = subprocess.run( + [str(ANALYZER), str(c_path)], + capture_output=True, + text=True, + ) + output = (result.stdout or "") + (result.stderr or "") + return output + + +def check_file(c_path: Path) -> bool: + """ + Vérifie qu'avec ce fichier, toutes les attentes sont présentes + dans la sortie de l'analyseur. + """ + print(f"=== Testing {c_path} ===") + expectations = extract_expectations(c_path) + if not expectations: + print(" (no expectations found, skipping)\n") + return True + + analyzer_output = run_analyzer_on_file(c_path) + norm_output = normalize(analyzer_output) + + all_ok = True + for idx, exp in enumerate(expectations, start=1): + norm_exp = normalize(exp) + if norm_exp in norm_output: + print(f" ✅ expectation #{idx} FOUND") + else: + print(f" ❌ expectation #{idx} MISSING") + print("----- Expected block -----") + print(exp) + print("----- Analyzer output (normalized) -----") + # tu peux commenter cette ligne si l'output est trop gros + # print(norm_output) + print("---------------------------") + all_ok = False + + print() + return all_ok + + +def main() -> int: + c_files = sorted(TEST_DIR.glob("**/*.c")) + if not c_files: + print(f"No .c files found under {TEST_DIR}") + return + + global_ok = True + for f in c_files: + ok = check_file(f) + if not ok: + global_ok = False + + if global_ok: + print("✅ All tests passed.") + return 0 + else: + print("❌ Some tests failed.") + return 1 + + +if __name__ == "__main__": + sys.exit(main()) From b10c5c25c85f0922642950032278f7c07baac85e Mon Sep 17 00:00:00 2001 From: Hugo Date: Thu, 27 Nov 2025 16:08:11 +0900 Subject: [PATCH 13/14] fix(test/ci): adding feature/stack-buffer-bounds-check branch --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4a68d17..eb61f3a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,7 +2,7 @@ name: CI on: push: - branches: [ main, master ] + branches: [ main, feature/stack-buffer-bounds-check ] pull_request: jobs: From cb7a2d8717ed26e9e5625427dbc5858a7ca4ed90 Mon Sep 17 00:00:00 2001 From: Hugo Date: Thu, 27 Nov 2025 16:36:07 +0900 Subject: [PATCH 14/14] test: integrate test suite --- .../bound-storage-for-statement.c | 23 +++++--- .../bound-storage-if-statement.c | 44 ++++++-------- test/bound-storage/bound-storage.c | 10 +++- test/bound-storage/deep-alias.c | 11 ++++ .../indirection-profonde-aliasing.c | 33 ++++++++--- test/bound-storage/ranges_test.c | 59 +++++++++++++++++++ test/bound-storage/struct_array_overflow.c | 32 +++++----- test/escape-stack/direct-callback.c | 3 + test/escape-stack/global-buf.c | 3 + test/escape-stack/global-struct.c | 14 +++++ test/escape-stack/global_struct.c | 12 ---- test/escape-stack/indirect-callback.c | 6 +- test/escape-stack/out_param.c | 5 +- test/escape-stack/return-buf.c | 4 +- test/escape-stack/stack_escape.c | 2 +- test/recursion/c/infinite-recursion.c | 3 + test/recursion/c/limited-recursion.c | 1 + test/vla/deguised-constant.c | 2 +- test/vla/vla-read.c | 10 ++++ test/vla/vla-scanf.c | 9 ++- 20 files changed, 207 insertions(+), 79 deletions(-) create mode 100644 test/escape-stack/global-struct.c delete mode 100644 test/escape-stack/global_struct.c diff --git a/test/bound-storage/bound-storage-for-statement.c b/test/bound-storage/bound-storage-for-statement.c index a4718ab..c8d0ce3 100644 --- a/test/bound-storage/bound-storage-for-statement.c +++ b/test/bound-storage/bound-storage-for-statement.c @@ -3,16 +3,20 @@ int main(void) char test[10]; char *ptr = test; - // [!!] potential stack buffer overflow on variable '' (size 10) - // index variable may go up to 19 (array last valid index: 9) - // (this is a write access) + // at line 12, column 17 + // [!!] potential stack buffer overflow on variable 'test' (size 10) + // alias path: test + // index variable may go up to 19 (array last valid index: 9) + // (this is a write access) for (int i = 0; i < 20; i++) { test[i] = 'a'; } - // [!!] potential stack buffer overflow on variable '' (size 10) - // index variable may go up to 11 (array last valid index: 9) - // (this is a write access) + // at line 21, column 17 + // [!!] potential stack buffer overflow on variable 'test' (size 10) + // alias path: test + // index variable may go up to 11 (array last valid index: 9) + // (this is a write access) for (int i = 0; i != 11; ++i) test[i] = 'a'; @@ -21,14 +25,17 @@ int main(void) test[i] = 'b'; } - // Same for pointer aliasing - // [!!] potential stack buffer overflow on variable '' (size 10) + // at line 34, column 16 + // [!!] potential stack buffer overflow on variable 'test' (size 10) + // alias path: test -> arraydecay -> ptr // index variable may go up to 19 (array last valid index: 9) // (this is a write access) for (int i = 0; i < 20; i++) { ptr[i] = 'a'; } + // [!Info] multiple stores to stack buffer 'test' in this function (4 store instruction(s), 4 distinct index expression(s)) + // stores use different index expressions; verify indices are correct and non-overlapping int n = 6; char buf[n]; // alloca variable return 0; diff --git a/test/bound-storage/bound-storage-if-statement.c b/test/bound-storage/bound-storage-if-statement.c index 6a7d009..2d80cbd 100644 --- a/test/bound-storage/bound-storage-if-statement.c +++ b/test/bound-storage/bound-storage-if-statement.c @@ -1,40 +1,34 @@ -// int main(void) -// { -// int i = 1; -// char test[10]; - -// if (i > 10) { -// test[11] = 'a'; -// } -// char test1[10]; if (i <= 10) test1[i] = 'a'; -// return 0; -// } - -// [warn] multiple stores to stack buffer '' in this function (2 store instruction(s), 2 distinct index expression(s)) -// stores use different index expressions; verify indices are correct and non-overlapping +// [!Info] multiple stores to stack buffer 'test1' in this function (2 store instruction(s), 2 distinct index expression(s)) +// stores use different index expressions; verify indices are correct and non-overlapping int main(void) { int i = 11; char test[10]; -// [!!] potential stack buffer overflow on variable '' (size 10) -// constant index 11 is out of bounds (0..9) -// (this is a write access) -// [info] this access appears unreachable at runtime (condition is always false for this branch) + // at line 15, column 18 + // [!!] potential stack buffer overflow on variable 'test' (size 10) + // alias path: test + // constant index 11 is out of bounds (0..9) + // (this is a write access) + // [info] this access appears unreachable at runtime (condition is always false for this branch) if (i <= 10) test[11] = 'a'; - // [!!] potential stack buffer overflow on variable '' (size 10) - // index variable may go up to 10 (array last valid index: 9) - // (this is a write access) - // [info] this access appears unreachable at runtime (condition is always false for this branch) + // at line 25, column 18 + // [!!] potential stack buffer overflow on variable 'test1' (size 10) + // alias path: test1 + // index variable may go up to 10 (array last valid index: 9) + // (this is a write access) + // [info] this access appears unreachable at runtime (condition is always false for this branch) char test1[10]; if (i <= 10) test1[i] = 'a'; -// [!!] potential stack buffer overflow on variable '' (size 10) -// index variable may go up to 10 (array last valid index: 9) -// (this is a write access) + // at line 34, column 18 + // [!!] potential stack buffer overflow on variable 'test1' (size 10) + // alias path: test1 + // index variable may go up to 10 (array last valid index: 9) + // (this is a write access) char test2[10]; if (i > 10) test1[i] = 'a'; diff --git a/test/bound-storage/bound-storage.c b/test/bound-storage/bound-storage.c index 3d5b263..6a9d1b1 100644 --- a/test/bound-storage/bound-storage.c +++ b/test/bound-storage/bound-storage.c @@ -1,15 +1,21 @@ +// [!Info] multiple stores to stack buffer 'test' in this function (4 store instruction(s), 3 distinct index expression(s)) +// stores use different index expressions; verify indices are correct and non-overlapping int main(void) { char test[10]; - // [!!] potential stack buffer overflow on variable '' (size 10) + // at line 12, column 14 + // [!!] potential stack buffer overflow on variable 'test' (size 10) + // alias path: test // constant index 11 is out of bounds (0..9) // (this is a write access) test[11] = 'a'; test[9] = 'b'; // OK - // [!!] potential stack buffer overflow on variable '' (size 10) + // at line 21, column 14 + // [!!] potential stack buffer overflow on variable 'test' (size 10) + // alias path: test // constant index 18446744073709551615 is out of bounds (0..9) // (this is a write access) test[-1] = 'c'; diff --git a/test/bound-storage/deep-alias.c b/test/bound-storage/deep-alias.c index acb2eeb..e4de467 100644 --- a/test/bound-storage/deep-alias.c +++ b/test/bound-storage/deep-alias.c @@ -5,6 +5,11 @@ void deep_alias(char *src) char *p2 = p1; char **pp = &p2; + // at line 14, column 18 + // [!!] potential stack buffer overflow on variable 'buf' (size 10) + // alias path: buf -> arraydecay -> p1 -> p2 -> pp + // index variable may go up to 19 (array last valid index: 9) + // (this is a write access) for (int i = 0; i < 20; ++i) { (*pp)[i] = src[i]; } @@ -12,7 +17,13 @@ void deep_alias(char *src) int main(void) { + // at line 23, column 10 + // [!!] stack pointer escape: address of variable 'src' escapes this function + // address passed as argument to function 'llvm.memset.p0.i64' (callee may capture the pointer beyond this function) char src[20] = {0}; + // at line 27, column 5 + // [!!] stack pointer escape: address of variable 'src' escapes this function + // address passed as argument to function 'deep_alias' (callee may capture the pointer beyond this function) deep_alias(src); return 0; } diff --git a/test/bound-storage/indirection-profonde-aliasing.c b/test/bound-storage/indirection-profonde-aliasing.c index ca5da42..3ad1d62 100644 --- a/test/bound-storage/indirection-profonde-aliasing.c +++ b/test/bound-storage/indirection-profonde-aliasing.c @@ -1,21 +1,37 @@ +// [!Info] multiple stores to stack buffer 'test' in this function (6 store instruction(s), 6 distinct index expression(s)) +// stores use different index expressions; verify indices are correct and non-overlapping int main(void) { char test[10]; char *ptr = test; char **pp = &ptr; + // at line 13, column 15 + // [!!] potential stack buffer overflow on variable 'test' (size 10) + // alias path: test -> arraydecay -> ptr + // constant index 14 is out of bounds (0..9) + // (this is a write access) (ptr)[14] = 'a'; + // at line 19, column 15 + // [!!] potential stack buffer overflow on variable 'test' (size 10) + // alias path: test -> arraydecay -> ptr -> pp + // constant index 15 is out of bounds (0..9) + // (this is a write access) (*pp)[15] = 'a'; - // [!!] potential stack buffer overflow on variable '' (size 10) - // index variable may go up to 19 (array last valid index: 9) - // (this is a write access) + // at line 27, column 17 + // [!!] potential stack buffer overflow on variable 'test' (size 10) + // alias path: test + // index variable may go up to 19 (array last valid index: 9) + // (this is a write access) for (int i = 0; i < 20; i++) { test[i] = 'a'; } - // [!!] potential stack buffer overflow on variable '' (size 10) - // index variable may go up to 11 (array last valid index: 9) - // (this is a write access) + // at line 36, column 17 + // [!!] potential stack buffer overflow on variable 'test' (size 10) + // alias path: test + // index variable may go up to 11 (array last valid index: 9) + // (this is a write access) for (int i = 0; i != 11; ++i) test[i] = 'a'; @@ -24,8 +40,9 @@ int main(void) test[i] = 'b'; } - // Same for pointer aliasing - // [!!] potential stack buffer overflow on variable '' (size 10) + // at line 49, column 16 + // [!!] potential stack buffer overflow on variable 'test' (size 10) + // alias path: test -> arraydecay -> ptr // index variable may go up to 19 (array last valid index: 9) // (this is a write access) for (int i = 0; i < 20; i++) { diff --git a/test/bound-storage/ranges_test.c b/test/bound-storage/ranges_test.c index b6dc757..cf9c5f1 100644 --- a/test/bound-storage/ranges_test.c +++ b/test/bound-storage/ranges_test.c @@ -18,6 +18,11 @@ void ub_overflow(int i) { char buf[10]; + // at line 27, column 16 + // [!!] potential stack buffer overflow on variable 'buf' (size 10) + // alias path: buf + // index variable may go up to 10 (array last valid index: 9) + // (this is a write access) if (i <= 10) buf[i] = 'B'; } @@ -31,6 +36,11 @@ void lb_negative(int i) { char buf[10]; + // at line 45, column 16 + // [!!] potential negative index on variable 'buf' (size 10) + // alias path: buf + // inferred lower bound for index expression: -3 (index may be < 0) + // (this is a write access) if (i >= -3 && i < 5) buf[i] = 'C'; } @@ -40,6 +50,17 @@ void lb_and_ub(int i) { char buf[10]; + // at line 65, column 16 + // [!!] potential stack buffer overflow on variable 'buf' (size 10) + // alias path: buf + // index variable may go up to 15 (array last valid index: 9) + // (this is a write access) + + // at line 65, column 16 + // [!!] potential negative index on variable 'buf' (size 10) + // alias path: buf + // inferred lower bound for index expression: -3 (index may be < 0) + // (this is a write access) if (i >= -3 && i <= 15) buf[i] = 'D'; } @@ -61,6 +82,11 @@ void nested_if_overflow(int i) { char buf[8]; + // at line 92, column 20 + // [!!] potential stack buffer overflow on variable 'buf' (size 8) + // alias path: buf + // index variable may go up to 10 (array last valid index: 7) + // (this is a write access) if (i <= 10) { if (i > 5) { buf[i] = 'E'; @@ -98,6 +124,11 @@ void loop_ub_overflow(void) { char buf[10]; + // at line 133, column 16 + // [!!] potential stack buffer overflow on variable 'buf' (size 10) + // alias path: buf + // index variable may go up to 10 (array last valid index: 9) + // (this is a write access) for (int i = 0; i <= 10; ++i) buf[i] = 'H'; } @@ -122,6 +153,12 @@ void unreachable_example(void) int i = 1; char buf[10]; + // at line 163, column 17 + // [!!] potential stack buffer overflow on variable 'buf' (size 10) + // alias path: buf + // constant index 11 is out of bounds (0..9) + // (this is a write access) + // [info] this access appears unreachable at runtime (condition is always false for this branch) if (i > 10) { // condition fausse à l’exécution buf[11] = 'J'; } @@ -137,6 +174,17 @@ void alias_lb_ub(int i) char buf[10]; char *p = buf; + // at line 189, column 14 + // [!!] potential stack buffer overflow on variable 'buf' (size 10) + // alias path: buf -> arraydecay -> p + // index variable may go up to 12 (array last valid index: 9) + // (this is a write access) + + // at line 189, column 14 + // [!!] potential negative index on variable 'buf' (size 10) + // alias path: p -> arraydecay -> buf + // inferred lower bound for index expression: -2 (index may be < 0) + // (this is a write access) if (i >= -2 && i <= 12) p[i] = 'K'; } @@ -173,6 +221,17 @@ void huge_range(int i) { char buf[10]; + // at line 236, column 16 + // [!!] potential stack buffer overflow on variable 'buf' (size 10) + // alias path: buf + // index variable may go up to 100 (array last valid index: 9) + // (this is a write access) + + // at line 236, column 16 + // [!!] potential negative index on variable 'buf' (size 10) + // alias path: buf + // inferred lower bound for index expression: -100 (index may be < 0) + // (this is a write access) if (i >= -100 && i <= 100) buf[i] = 'N'; } diff --git a/test/bound-storage/struct_array_overflow.c b/test/bound-storage/struct_array_overflow.c index 291da2d..03959d7 100644 --- a/test/bound-storage/struct_array_overflow.c +++ b/test/bound-storage/struct_array_overflow.c @@ -12,40 +12,40 @@ void ok_direct(void) s.buf[i] = 'A'; // OK } -// Function: overflow_eq_10 (line 19, column 18) -// [!!] potential stack buffer overflow on variable 's' (size 10) -// alias path: s -> buf -// index variable may go up to 10 (array last valid index: 9) -// (this is a write access) void overflow_eq_10(void) { struct S s; + // at line 24, column 18 + // [!!] potential stack buffer overflow on variable 's' (size 10) + // alias path: s -> buf + // index variable may go up to 10 (array last valid index: 9) + // (this is a write access) for (int i = 0; i <= 10; ++i) s.buf[i] = 'B'; // i == 10 -> overflow } -// Function: overflow_const_index (line 25, column 15) -// [!!] potential stack buffer overflow on variable 's' (size 10) -// alias path: s -> buf -// constant index 11 is out of bounds (0..9) -// (this is a write access) void overflow_const_index(void) { struct S s; + // at line 35, column 15 + // [!!] potential stack buffer overflow on variable 's' (size 10) + // alias path: s -> buf + // constant index 11 is out of bounds (0..9) + // (this is a write access) s.buf[11] = 'C'; // overflow constant } -// Function: nested_if_overflow (line 34, column 18) -// [!!] potential stack buffer overflow on variable 's' (size 10) -// alias path: s -> buf -// index variable may go up to 15 (array last valid index: 9) -// (this is a write access) void nested_if_overflow(void) { struct S s; int i = 15; - if (i > 5 && i <= 15) // ton analyse de bornes devrait voir UB = 15 + // at line 49, column 18 + // [!!] potential stack buffer overflow on variable 's' (size 10) + // alias path: s -> buf + // index variable may go up to 15 (array last valid index: 9) + // (this is a write access) + if (i > 5 && i <= 15) // UB = 15 s.buf[i] = 'D'; // overflow } diff --git a/test/escape-stack/direct-callback.c b/test/escape-stack/direct-callback.c index 01ed683..577805a 100644 --- a/test/escape-stack/direct-callback.c +++ b/test/escape-stack/direct-callback.c @@ -4,5 +4,8 @@ void sink(char *p); void pass_to_sink(void) { char buf[10]; + // at line 10, column 5 + // [!!] stack pointer escape: address of variable 'buf' escapes this function + // address passed as argument to function 'sink' (callee may capture the pointer beyond this function) sink(buf); // le callee peut capturer le pointeur } diff --git a/test/escape-stack/global-buf.c b/test/escape-stack/global-buf.c index 6e1e260..6c05cd3 100644 --- a/test/escape-stack/global-buf.c +++ b/test/escape-stack/global-buf.c @@ -4,6 +4,9 @@ static char *g; void set_global(void) { char buf[10]; + // at line 10, column 7 + // [!!] stack pointer escape: address of variable 'buf' escapes this function + // stored into global variable 'g' (pointer may be used after the function returns) g = buf; // warning attendu: store_global } diff --git a/test/escape-stack/global-struct.c b/test/escape-stack/global-struct.c new file mode 100644 index 0000000..d5eb36b --- /dev/null +++ b/test/escape-stack/global-struct.c @@ -0,0 +1,14 @@ +struct Holder { + char *p; +}; + +struct Holder G; + +void store_in_global_field(void) +{ + char buf[10]; + // at line 13, column 9 + // [!!] stack pointer escape: address of variable 'buf' escapes this function + // stored into global variable 'G' (pointer may be used after the function returns) + G.p = buf; // leak : G is global +} diff --git a/test/escape-stack/global_struct.c b/test/escape-stack/global_struct.c deleted file mode 100644 index 0584897..0000000 --- a/test/escape-stack/global_struct.c +++ /dev/null @@ -1,12 +0,0 @@ -// case_global_struct.c -struct Holder { - char *p; -}; - -struct Holder G; - -void store_in_global_field(void) -{ - char buf[10]; - G.p = buf; // fuite : G est global -} diff --git a/test/escape-stack/indirect-callback.c b/test/escape-stack/indirect-callback.c index dce17d8..cd7baf5 100644 --- a/test/escape-stack/indirect-callback.c +++ b/test/escape-stack/indirect-callback.c @@ -1,8 +1,10 @@ -// case_callback.c typedef void (*cb_t)(char *); void use_callback(cb_t cb) { char buf[10]; - cb(buf); // fuite potentielle via callback + // at line 9, column 5 + // [!!] stack pointer escape: address of variable 'buf' escapes this function + // address passed as argument to an indirect call (callback may capture the pointer beyond this function) + cb(buf); // potential leak by callback } diff --git a/test/escape-stack/out_param.c b/test/escape-stack/out_param.c index 76c9f87..5910181 100644 --- a/test/escape-stack/out_param.c +++ b/test/escape-stack/out_param.c @@ -1,11 +1,12 @@ -// case_out_param.c void leak_out_param(char **out) { char buf[10]; + // at line 7, column 10 + // [!!] stack pointer escape: address of variable 'buf' escapes this function + // stored through a non-local pointer (e.g. via an out-parameter; pointer may outlive this function) *out = buf; // fuite via paramètre de sortie } -// case_out_param_safe.c void safe_out_param(char **out) { char *local = 0; // pointeur, mais pas de stack buffer derrière diff --git a/test/escape-stack/return-buf.c b/test/escape-stack/return-buf.c index 8bf0ad1..bdfbc1d 100644 --- a/test/escape-stack/return-buf.c +++ b/test/escape-stack/return-buf.c @@ -1,7 +1,9 @@ -// tests/stack_escape_return.c char *ret_buf(void) { char buf[10]; + // at line 7, column 5 + // [!!] stack pointer escape: address of variable 'buf' escapes this function + // escape via return statement (pointer to stack returned to caller) return buf; // warning attendu: return } diff --git a/test/escape-stack/stack_escape.c b/test/escape-stack/stack_escape.c index d3da7b5..727807a 100644 --- a/test/escape-stack/stack_escape.c +++ b/test/escape-stack/stack_escape.c @@ -1,5 +1,5 @@ -// tests/stack_escape.c char *g_ptr; + struct Holder { char *p; }; diff --git a/test/recursion/c/infinite-recursion.c b/test/recursion/c/infinite-recursion.c index f5f098c..31eb777 100644 --- a/test/recursion/c/infinite-recursion.c +++ b/test/recursion/c/infinite-recursion.c @@ -1,5 +1,8 @@ #include +// [!] recursive or mutually recursive function detected +// [!!!] unconditional self recursion detected (no base case) +// this will eventually overflow the stack at runtime void tutu(void) { tutu(); diff --git a/test/recursion/c/limited-recursion.c b/test/recursion/c/limited-recursion.c index 28deaf1..ab1a458 100644 --- a/test/recursion/c/limited-recursion.c +++ b/test/recursion/c/limited-recursion.c @@ -1,5 +1,6 @@ #include +// [!] recursive or mutually recursive function detected void tutu(void) { static int counter = 0; diff --git a/test/vla/deguised-constant.c b/test/vla/deguised-constant.c index 029dde2..af51a48 100644 --- a/test/vla/deguised-constant.c +++ b/test/vla/deguised-constant.c @@ -1,7 +1,7 @@ void foo(void) { int n = 6; - char buf[n]; // techniquement VLA, mais bornée et triviale + char buf[n]; // techniquement VLA, mais bornée et triviale, patch car faux positif } int main(int ac, char **av) diff --git a/test/vla/vla-read.c b/test/vla/vla-read.c index ac1e815..3257323 100644 --- a/test/vla/vla-read.c +++ b/test/vla/vla-read.c @@ -5,12 +5,19 @@ int main(void) { char tmp[1024]; + // ----- at line 11, column 17 + // [!!] stack pointer escape: address of variable 'tmp' escapes this function + // address passed as argument to function '_read' (callee may capture the pointer beyond this function) ssize_t n = read(STDIN_FILENO, tmp, sizeof(tmp)); if (n <= 0) return 1; // char *buf = malloc(n); int len = (int)n; + // at line 21, column 5 + // [!] dynamic stack allocation detected for variable 'vla' + // allocated type: i8 + // size of this allocation is not compile-time constant (VLA / variable alloca) and may lead to unbounded stack usage char buf[len]; if (!buf) return 1; @@ -18,6 +25,9 @@ int main(void) for (ssize_t i = 0; i < n; ++i) buf[i] = tmp[i]; + // at line 31, column 5 + // [!!] stack pointer escape: address of variable 'vla' escapes this function + // address passed as argument to function 'free' (callee may capture the pointer beyond this function) free(buf); return 0; } diff --git a/test/vla/vla-scanf.c b/test/vla/vla-scanf.c index 6b9c7fb..55ffe9a 100644 --- a/test/vla/vla-scanf.c +++ b/test/vla/vla-scanf.c @@ -3,10 +3,17 @@ int main(void) { int n; + // at line 9, column 9 + // [!!] stack pointer escape: address of variable 'n' escapes this function + // address passed as argument to function 'scanf' (callee may capture the pointer beyond this function) if (scanf("%d", &n) != 1) return 1; - char buf[n]; // VLA aussi + // at line 16, column 5 + // [!] dynamic stack allocation detected for variable 'vla' + // allocated type: i8 + // size of this allocation is not compile-time constant (VLA / variable alloca) and may lead to unbounded stack usage + char buf[n]; // VLA too return 0; }