Coccinelle archive mirror
 help / color / mirror / Atom feed
From: Akos Pasztory <akos.pasztory@gmail.com>
To: cocci@inria.fr
Subject: [cocci] type of integer decimal constants
Date: Thu, 11 Apr 2024 12:54:31 +0300	[thread overview]
Message-ID: <CAJwHcF57ee4M6P-nzg9=1drDQbpic-4qqbpSCbL1FO7XXsetVw@mail.gmail.com> (raw)


[-- Attachment #1.1: Type: text/plain, Size: 1592 bytes --]

Hi,

I noticed an issue with how coccinelle determines types of (at least)
decimal literals.
This only happens if --int-bits and/or --long-bits is specified, since
otherwise everything is treated as "int".
The output below was truly puzzling initially:

$ { echo 'void f() {'; printf '%d;\n' $(seq 0 32); echo '}'; } > decimals.c
$ ./spatch.opt --long-bits 32 --int-bits 32 --type-c decimals.c
void f() {
0/*int*/;
1/*int*/;
2/*int*/;
3/*unsigned long*/;
4/*unsigned long*/;
5/*long long*/;
6/*long long*/;
7/*long long*/;
8/*long long*/;
9/*long long*/;
10/*int*/;
11/*int*/;
12/*int*/;
13/*int*/;
14/*int*/;
15/*int*/;
16/*int*/;
17/*int*/;
18/*int*/;
19/*int*/;
20/*int*/;
21/*int*/;
22/*unsigned long*/;
23/*unsigned long*/;
24/*unsigned long*/;
25/*unsigned long*/;
26/*unsigned long*/;
27/*unsigned long*/;
28/*unsigned long*/;
29/*unsigned long*/;
30/*unsigned long*/;
31/*unsigned long*/;
32/*unsigned long*/;
}

After some investigation, I think the problem was introduced in
3c5dc62bae3e38d77044423ecb3575f30ab230fa
when a bigint dependency was removed. The commit replaces integer
thresholds with strings,
however in is_long_dec it does the following kind of changes:

-      if Big_int.ge_big_int bn ulong_threshold
+      if s >= ulong_threshold

This seems to do a lexicographic comparison on strings instead of numeric.
One solution that came to my mind is to zero-pad the strings to same length,
then lexicographic comparison should be also correct.

Attached a patch, but note that I'm not an OCaml programmer, so it may be
not idiomatic.
Feel free to solve it another way;)

[-- Attachment #1.2: Type: text/html, Size: 2054 bytes --]

[-- Attachment #2: 0001-fix-type-deduction-of-decimal-constants.patch --]
[-- Type: text/x-patch, Size: 6629 bytes --]

From 0781a67831dae49ba5db1e5a1f0d227311e7e71b Mon Sep 17 00:00:00 2001
Message-Id: <0781a67831dae49ba5db1e5a1f0d227311e7e71b.1712828847.git.akos.pasztory@gmail.com>
From: Akos Pasztory <akos.pasztory@gmail.com>
Date: Thu, 11 Apr 2024 09:52:46 +0300
Subject: [PATCH] fix type deduction of decimal constants

In a previous commit 'is_long_dec' changed to lexicographical
comparison of strings, which will give incorrect results. It only
happens if either --int-bits or --long-bits option is given, otherwise
everything is treated as "int".

May still not be entirely correct, the standard says unsuffixed
decimal constants are one of "int", "long", or "long long", and now it
still assigns unsigned variants.
---
 parsing_c/lexer_c.mll          | 19 ++++++++++---
 tests/typeofconst_i32l64.c     | 49 ++++++++++++++++++++++++++++++++++
 tests/typeofconst_i32l64.cocci | 29 ++++++++++++++++++++
 tests/typeofconst_i32l64.res   | 49 ++++++++++++++++++++++++++++++++++
 tests/typeofconst_il32.c       | 49 ++++++++++++++++++++++++++++++++++
 tests/typeofconst_il32.cocci   | 29 ++++++++++++++++++++
 tests/typeofconst_il32.res     | 49 ++++++++++++++++++++++++++++++++++
 7 files changed, 269 insertions(+), 4 deletions(-)
 create mode 100644 tests/typeofconst_i32l64.c
 create mode 100644 tests/typeofconst_i32l64.cocci
 create mode 100644 tests/typeofconst_i32l64.res
 create mode 100644 tests/typeofconst_il32.c
 create mode 100644 tests/typeofconst_il32.cocci
 create mode 100644 tests/typeofconst_il32.res

diff --git a/parsing_c/lexer_c.mll b/parsing_c/lexer_c.mll
index 5379c841..a56c1bc7 100644
--- a/parsing_c/lexer_c.mll
+++ b/parsing_c/lexer_c.mll
@@ -281,20 +281,31 @@ let error_radix s =
 
 (* julia: functions for figuring out the type of integers *)
 
+
+(* Compare two numbers in string form by zero padding to the same
+   length. *)
+let ge_str a b =
+  let alen = String.length a in
+  let blen = String.length b in
+  let n = max alen blen in
+  let apad = (String.make (n - alen) '0') ^ a in
+  let bpad = (String.make (n - blen) '0') ^ b in
+  apad >= bpad
+
 let is_long_dec s int uint long ulong longlong ulonglong =
   match !Flag_parsing_c.int_thresholds with
     None -> int
   | Some (_,_,int_threshold, uint_threshold,long_threshold,ulong_threshold) ->
-      if s >= ulong_threshold
+      if ge_str s ulong_threshold
       then longlong
       else
-	if s >= long_threshold
+	if ge_str s long_threshold
 	then ulong
 	else
-	  if s >= uint_threshold
+	  if ge_str s uint_threshold
 	  then long
 	  else
-	    if s >= int_threshold
+	    if ge_str s int_threshold
 	    then uint
 	    else int
 
diff --git a/tests/typeofconst_i32l64.c b/tests/typeofconst_i32l64.c
new file mode 100644
index 00000000..1c584fea
--- /dev/null
+++ b/tests/typeofconst_i32l64.c
@@ -0,0 +1,49 @@
+void f() {
+0;
+1;
+2;
+3;
+4;
+5;
+6;
+7;
+8;
+9;
+10;
+11;
+12;
+13;
+14;
+15;
+16;
+17;
+18;
+19;
+20;
+21;
+22;
+23;
+24;
+25;
+26;
+27;
+28;
+29;
+30;
+31;
+32;
+
+2147483647; // 2**31 - 1
+2147483648; // 2**31
+2147483649; // 2**31 + 1
+
+4294967295; // 2**32 - 1
+4294967296; // 2**32
+4294967297; // 2**32 + 1
+
+9223372036854775807; // 2**63 - 1
+9223372036854775808; // 2**63
+9223372036854775809; // 2**63 +1
+
+18446744073709551615; // 2**64 - 1
+}
diff --git a/tests/typeofconst_i32l64.cocci b/tests/typeofconst_i32l64.cocci
new file mode 100644
index 00000000..3bc7b9fe
--- /dev/null
+++ b/tests/typeofconst_i32l64.cocci
@@ -0,0 +1,29 @@
+#spatch --int-bits 32 --long-bits 64
+@@
+int si;
+long sl;
+long long sll;
+unsigned int ui;
+unsigned long ul;
+unsigned long long ull;
+@@
+
+(
+- si
++ INT(si)
+|
+- sl
++ LONG(sl)
+|
+- sll
++ LONGLONG(sll)
+|
+- ui
++ UINT(ui)
+|
+- ul
++ ULONG(ul)
+|
+- ull
++ ULONGLONG(ull)
+)
diff --git a/tests/typeofconst_i32l64.res b/tests/typeofconst_i32l64.res
new file mode 100644
index 00000000..b65bec2b
--- /dev/null
+++ b/tests/typeofconst_i32l64.res
@@ -0,0 +1,49 @@
+void f() {
+INT(0);
+INT(1);
+INT(2);
+INT(3);
+INT(4);
+INT(5);
+INT(6);
+INT(7);
+INT(8);
+INT(9);
+INT(10);
+INT(11);
+INT(12);
+INT(13);
+INT(14);
+INT(15);
+INT(16);
+INT(17);
+INT(18);
+INT(19);
+INT(20);
+INT(21);
+INT(22);
+INT(23);
+INT(24);
+INT(25);
+INT(26);
+INT(27);
+INT(28);
+INT(29);
+INT(30);
+INT(31);
+INT(32);
+
+INT(2147483647); // 2**31 - 1
+LONG(2147483648); // 2**31
+LONG(2147483649); // 2**31 + 1
+
+LONG(4294967295); // 2**32 - 1
+LONG(4294967296); // 2**32
+LONG(4294967297); // 2**32 + 1
+
+LONG(9223372036854775807); // 2**63 - 1
+ULONG(9223372036854775808); // 2**63
+ULONG(9223372036854775809); // 2**63 +1
+
+ULONG(18446744073709551615); // 2**64 - 1
+}
diff --git a/tests/typeofconst_il32.c b/tests/typeofconst_il32.c
new file mode 100644
index 00000000..1c584fea
--- /dev/null
+++ b/tests/typeofconst_il32.c
@@ -0,0 +1,49 @@
+void f() {
+0;
+1;
+2;
+3;
+4;
+5;
+6;
+7;
+8;
+9;
+10;
+11;
+12;
+13;
+14;
+15;
+16;
+17;
+18;
+19;
+20;
+21;
+22;
+23;
+24;
+25;
+26;
+27;
+28;
+29;
+30;
+31;
+32;
+
+2147483647; // 2**31 - 1
+2147483648; // 2**31
+2147483649; // 2**31 + 1
+
+4294967295; // 2**32 - 1
+4294967296; // 2**32
+4294967297; // 2**32 + 1
+
+9223372036854775807; // 2**63 - 1
+9223372036854775808; // 2**63
+9223372036854775809; // 2**63 +1
+
+18446744073709551615; // 2**64 - 1
+}
diff --git a/tests/typeofconst_il32.cocci b/tests/typeofconst_il32.cocci
new file mode 100644
index 00000000..b47e4ae6
--- /dev/null
+++ b/tests/typeofconst_il32.cocci
@@ -0,0 +1,29 @@
+#spatch --int-bits 32 --long-bits 32
+@@
+int si;
+long sl;
+long long sll;
+unsigned int ui;
+unsigned long ul;
+unsigned long long ull;
+@@
+
+(
+- si
++ INT(si)
+|
+- sl
++ LONG(sl)
+|
+- sll
++ LONGLONG(sll)
+|
+- ui
++ UINT(ui)
+|
+- ul
++ ULONG(ul)
+|
+- ull
++ ULONGLONG(ull)
+)
diff --git a/tests/typeofconst_il32.res b/tests/typeofconst_il32.res
new file mode 100644
index 00000000..f74513cf
--- /dev/null
+++ b/tests/typeofconst_il32.res
@@ -0,0 +1,49 @@
+void f() {
+INT(0);
+INT(1);
+INT(2);
+INT(3);
+INT(4);
+INT(5);
+INT(6);
+INT(7);
+INT(8);
+INT(9);
+INT(10);
+INT(11);
+INT(12);
+INT(13);
+INT(14);
+INT(15);
+INT(16);
+INT(17);
+INT(18);
+INT(19);
+INT(20);
+INT(21);
+INT(22);
+INT(23);
+INT(24);
+INT(25);
+INT(26);
+INT(27);
+INT(28);
+INT(29);
+INT(30);
+INT(31);
+INT(32);
+
+INT(2147483647); // 2**31 - 1
+ULONG(2147483648); // 2**31
+ULONG(2147483649); // 2**31 + 1
+
+ULONG(4294967295); // 2**32 - 1
+LONGLONG(4294967296); // 2**32
+LONGLONG(4294967297); // 2**32 + 1
+
+LONGLONG(9223372036854775807); // 2**63 - 1
+LONGLONG(9223372036854775808); // 2**63
+LONGLONG(9223372036854775809); // 2**63 +1
+
+LONGLONG(18446744073709551615); // 2**64 - 1
+}
-- 
2.39.2


             reply	other threads:[~2024-04-11 17:46 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-04-11  9:54 Akos Pasztory [this message]
2024-04-11 17:52 ` [cocci] type of integer decimal constants Julia Lawall
2024-04-11 18:44   ` Akos Pasztory
2024-04-11 21:25     ` Julia Lawall

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CAJwHcF57ee4M6P-nzg9=1drDQbpic-4qqbpSCbL1FO7XXsetVw@mail.gmail.com' \
    --to=akos.pasztory@gmail.com \
    --cc=cocci@inria.fr \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).