compiler: intrinsify some math/bits functions

Let the Go frontend recognize some math/bits functions and turn
them into intrinsics.

Corresponding change in the GCC backend:

Index: gcc/go/go-gcc.cc
===================================================================
--- gcc/go/go-gcc.cc	(revision 272290)
+++ gcc/go/go-gcc.cc	(working copy)
@@ -613,7 +613,7 @@
 						NULL_TREE),
 		       false, false);

-  // Used by runtime/internal/sys.
+  // Used by runtime/internal/sys and math/bits.
   this->define_builtin(BUILT_IN_CTZ, "__builtin_ctz", "ctz",
 		       build_function_type_list(integer_type_node,
 						unsigned_type_node,
@@ -624,6 +624,31 @@
 						long_long_unsigned_type_node,
 						NULL_TREE),
 		       true, false);
+  this->define_builtin(BUILT_IN_CLZ, "__builtin_clz", "clz",
+		       build_function_type_list(integer_type_node,
+						unsigned_type_node,
+						NULL_TREE),
+		       true, false);
+  this->define_builtin(BUILT_IN_CLZLL, "__builtin_clzll", "clzll",
+		       build_function_type_list(integer_type_node,
+						long_long_unsigned_type_node,
+						NULL_TREE),
+		       true, false);
+  this->define_builtin(BUILT_IN_POPCOUNT, "__builtin_popcount", "popcount",
+		       build_function_type_list(integer_type_node,
+						unsigned_type_node,
+						NULL_TREE),
+		       true, false);
+  this->define_builtin(BUILT_IN_POPCOUNTLL, "__builtin_popcountll", "popcountll",
+		       build_function_type_list(integer_type_node,
+						long_long_unsigned_type_node,
+						NULL_TREE),
+		       true, false);
+  this->define_builtin(BUILT_IN_BSWAP16, "__builtin_bswap16", "bswap16",
+		       build_function_type_list(uint16_type_node,
+						uint16_type_node,
+						NULL_TREE),
+		       true, false);
   this->define_builtin(BUILT_IN_BSWAP32, "__builtin_bswap32", "bswap32",
 		       build_function_type_list(uint32_type_node,
 						uint32_type_node,

Change-Id: If44b18882c885f1c93d4682890f176b89cbc5b1a
Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/183266
Reviewed-by: Ian Lance Taylor <iant@golang.org>
diff --git a/go/expressions.cc b/go/expressions.cc
index b9cf0f3..04aacf8 100644
--- a/go/expressions.cc
+++ b/go/expressions.cc
@@ -11063,6 +11063,25 @@
       package = "runtime/internal/atomic";
     }
 
+  if (package == "runtime/internal/sys")
+    {
+      // runtime/internal/sys functions and math/bits functions
+      // are very similar. In order not to duplicate code, we just
+      // redirect to the latter and let the code below to handle them.
+      if (name == "Bswap32")
+        name = "ReverseBytes32";
+      else if (name == "Bswap64")
+        name = "ReverseBytes64";
+      else if (name == "Ctz32")
+        name = "TrailingZeros32";
+      else if (name == "Ctz64")
+        name = "TrailingZeros64";
+      else
+        return NULL;
+
+      package = "math/bits";
+    }
+
   if (package == "runtime")
     {
       // Handle a couple of special runtime functions.  In the runtime
@@ -11093,21 +11112,44 @@
           return Expression::make_cast(uintptr_type, call, loc);
         }
     }
-  else if (package == "runtime/internal/sys")
+  else if (package == "math/bits")
     {
-      if (name == "Bswap32"
+      if ((name == "ReverseBytes16" || name == "ReverseBytes32"
+           || name == "ReverseBytes64" || name == "ReverseBytes")
           && this->args_ != NULL && this->args_->size() == 1)
         {
+          Runtime::Function code;
+          if (name == "ReverseBytes16")
+            code = Runtime::BUILTIN_BSWAP16;
+          else if (name == "ReverseBytes32")
+            code = Runtime::BUILTIN_BSWAP32;
+          else if (name == "ReverseBytes64")
+            code = Runtime::BUILTIN_BSWAP64;
+          else if (name == "ReverseBytes")
+            code = (int_size == 8 ? Runtime::BUILTIN_BSWAP64 : Runtime::BUILTIN_BSWAP32);
+          else
+            go_unreachable();
           Expression* arg = this->args_->front();
-          return Runtime::make_call(Runtime::BUILTIN_BSWAP32, loc, 1, arg);
+          Expression* call = Runtime::make_call(code, loc, 1, arg);
+          if (name == "ReverseBytes")
+            return Expression::make_cast(uint_type, call, loc);
+          return call;
         }
-      else if (name == "Bswap64"
+      else if ((name == "TrailingZeros8" || name == "TrailingZeros16")
                && this->args_ != NULL && this->args_->size() == 1)
         {
+          // GCC does not have a ctz8 or ctz16 intrinsic. We do
+          // ctz32(0x100 | arg) or ctz32(0x10000 | arg).
           Expression* arg = this->args_->front();
-          return Runtime::make_call(Runtime::BUILTIN_BSWAP64, loc, 1, arg);
+          arg = Expression::make_cast(uint32_type, arg, loc);
+          unsigned long mask = (name == "TrailingZeros8" ? 0x100 : 0x10000);
+          Expression* c = Expression::make_integer_ul(mask, uint32_type, loc);
+          arg = Expression::make_binary(OPERATOR_OR, arg, c, loc);
+          Expression* call = Runtime::make_call(Runtime::BUILTIN_CTZ, loc, 1, arg);
+          return Expression::make_cast(int_type, call, loc);
         }
-      else if (name == "Ctz32"
+      else if ((name == "TrailingZeros32"
+                || (name == "TrailingZeros" && int_size == 4))
                && this->args_ != NULL && this->args_->size() == 1)
         {
           Expression* arg = this->args_->front();
@@ -11125,7 +11167,8 @@
           call = Expression::make_cast(int_type, call, loc);
           return Expression::make_conditional(cmp, c32, call, loc);
         }
-      else if (name == "Ctz64"
+      else if ((name == "TrailingZeros64"
+                || (name == "TrailingZeros" && int_size == 8))
                && this->args_ != NULL && this->args_->size() == 1)
         {
           Expression* arg = this->args_->front();
@@ -11143,6 +11186,99 @@
           call = Expression::make_cast(int_type, call, loc);
           return Expression::make_conditional(cmp, c64, call, loc);
         }
+      else if ((name == "LeadingZeros8" || name == "LeadingZeros16"
+                || name == "Len8" || name == "Len16")
+               && this->args_ != NULL && this->args_->size() == 1)
+        {
+          // GCC does not have a clz8 ir clz16 intrinsic. We do
+          // clz32(arg<<24 | 0xffffff) or clz32(arg<<16 | 0xffff).
+          Expression* arg = this->args_->front();
+          arg = Expression::make_cast(uint32_type, arg, loc);
+          unsigned long shift =
+            ((name == "LeadingZeros8" || name == "Len8") ? 24 : 16);
+          Expression* c = Expression::make_integer_ul(shift, uint32_type, loc);
+          arg = Expression::make_binary(OPERATOR_LSHIFT, arg, c, loc);
+          unsigned long mask =
+            ((name == "LeadingZeros8" || name == "Len8") ? 0xffffff : 0xffff);
+          c = Expression::make_integer_ul(mask, uint32_type, loc);
+          arg = Expression::make_binary(OPERATOR_OR, arg, c, loc);
+          Expression* call = Runtime::make_call(Runtime::BUILTIN_CLZ, loc, 1, arg);
+          call = Expression::make_cast(int_type, call, loc);
+          // len = width - clz
+          if (name == "Len8")
+            {
+              c = Expression::make_integer_ul(8, int_type, loc);
+              return Expression::make_binary(OPERATOR_MINUS, c, call, loc);
+            }
+          else if (name == "Len16")
+            {
+              c = Expression::make_integer_ul(16, int_type, loc);
+              return Expression::make_binary(OPERATOR_MINUS, c, call, loc);
+            }
+          return call;
+        }
+      else if ((name == "LeadingZeros32" || name == "Len32"
+                || ((name == "LeadingZeros" || name == "Len") && int_size == 4))
+               && this->args_ != NULL && this->args_->size() == 1)
+        {
+          Expression* arg = this->args_->front();
+          if (!arg->is_variable())
+            {
+              Temporary_statement* ts = Statement::make_temporary(uint32_type, arg, loc);
+              inserter->insert(ts);
+              arg = Expression::make_temporary_reference(ts, loc);
+            }
+          // arg == 0 ? 32 : __builtin_clz(arg)
+          Expression* zero = Expression::make_integer_ul(0, uint32_type, loc);
+          Expression* cmp = Expression::make_binary(OPERATOR_EQEQ, arg, zero, loc);
+          Expression* c32 = Expression::make_integer_ul(32, int_type, loc);
+          Expression* call = Runtime::make_call(Runtime::BUILTIN_CLZ, loc, 1, arg->copy());
+          call = Expression::make_cast(int_type, call, loc);
+          Expression* cond = Expression::make_conditional(cmp, c32, call, loc);
+          // len = 32 - clz
+          if (name == "Len32" || name == "Len")
+            return Expression::make_binary(OPERATOR_MINUS, c32->copy(), cond, loc);
+          return cond;
+        }
+      else if ((name == "LeadingZeros64" || name == "Len64"
+                || ((name == "LeadingZeros" || name == "Len") && int_size == 8))
+               && this->args_ != NULL && this->args_->size() == 1)
+        {
+          Expression* arg = this->args_->front();
+          if (!arg->is_variable())
+            {
+              Temporary_statement* ts = Statement::make_temporary(uint64_type, arg, loc);
+              inserter->insert(ts);
+              arg = Expression::make_temporary_reference(ts, loc);
+            }
+          // arg == 0 ? 64 : __builtin_clzll(arg)
+          Expression* zero = Expression::make_integer_ul(0, uint64_type, loc);
+          Expression* cmp = Expression::make_binary(OPERATOR_EQEQ, arg, zero, loc);
+          Expression* c64 = Expression::make_integer_ul(64, int_type, loc);
+          Expression* call = Runtime::make_call(Runtime::BUILTIN_CLZLL, loc, 1, arg->copy());
+          call = Expression::make_cast(int_type, call, loc);
+          Expression* cond = Expression::make_conditional(cmp, c64, call, loc);
+          // len = 64 - clz
+          if (name == "Len64" || name == "Len")
+            return Expression::make_binary(OPERATOR_MINUS, c64->copy(), cond, loc);
+          return cond;
+        }
+      else if ((name == "OnesCount8" || name == "OnesCount16"
+           || name == "OnesCount32" || name == "OnesCount64"
+           || name == "OnesCount")
+          && this->args_ != NULL && this->args_->size() == 1)
+        {
+          Runtime::Function code;
+          if (name == "OnesCount64")
+            code = Runtime::BUILTIN_POPCOUNTLL;
+          else if (name == "OnesCount")
+            code = (int_size == 8 ? Runtime::BUILTIN_POPCOUNTLL : Runtime::BUILTIN_POPCOUNT);
+          else
+            code = Runtime::BUILTIN_POPCOUNT;
+          Expression* arg = this->args_->front();
+          Expression* call = Runtime::make_call(code, loc, 1, arg);
+          return Expression::make_cast(int_type, call, loc);
+        }
     }
   else if (package == "runtime/internal/atomic")
     {
diff --git a/go/runtime.cc b/go/runtime.cc
index 28aca44..e35658b 100644
--- a/go/runtime.cc
+++ b/go/runtime.cc
@@ -32,6 +32,8 @@
   RFT_INT,
   // Go type uint8, C type uint8_t.
   RFT_UINT8,
+  // Go type uint16, C type uint16_t.
+  RFT_UINT16,
   // Go type int32, C type int32_t.
   RFT_INT32,
   // Go type uint32, C type uint32_t.
@@ -115,6 +117,10 @@
 	  t = Type::lookup_integer_type("uint8");
 	  break;
 
+	case RFT_UINT16:
+	  t = Type::lookup_integer_type("uint16");
+	  break;
+
 	case RFT_INT32:
 	  t = Type::lookup_integer_type("int32");
 	  break;
@@ -257,6 +263,7 @@
     case RFT_BOOLPTR:
     case RFT_INT:
     case RFT_UINT8:
+    case RFT_UINT16:
     case RFT_INT32:
     case RFT_UINT32:
     case RFT_INT64:
diff --git a/go/runtime.def b/go/runtime.def
index a966cd4..27d5e47 100644
--- a/go/runtime.def
+++ b/go/runtime.def
@@ -428,6 +428,8 @@
                R1(POINTER))
 
 // Swap bytes.
+DEF_GO_RUNTIME(BUILTIN_BSWAP16, "__builtin_bswap16", P1(UINT16),
+               R1(UINT16))
 DEF_GO_RUNTIME(BUILTIN_BSWAP32, "__builtin_bswap32", P1(UINT32),
                R1(UINT32))
 DEF_GO_RUNTIME(BUILTIN_BSWAP64, "__builtin_bswap64", P1(UINT64),
@@ -437,6 +439,14 @@
 DEF_GO_RUNTIME(BUILTIN_CTZ, "__builtin_ctz", P1(UINT32), R1(INT32))
 DEF_GO_RUNTIME(BUILTIN_CTZLL, "__builtin_ctzll", P1(UINT64), R1(INT32))
 
+// Count leading zeros.
+DEF_GO_RUNTIME(BUILTIN_CLZ, "__builtin_clz", P1(UINT32), R1(INT32))
+DEF_GO_RUNTIME(BUILTIN_CLZLL, "__builtin_clzll", P1(UINT64), R1(INT32))
+
+// Count one bits.
+DEF_GO_RUNTIME(BUILTIN_POPCOUNT, "__builtin_popcount", P1(UINT32), R1(INT32))
+DEF_GO_RUNTIME(BUILTIN_POPCOUNTLL, "__builtin_popcountll", P1(UINT64), R1(INT32))
+
 // Atomics.
 DEF_GO_RUNTIME(ATOMIC_LOAD_4, "__atomic_load_4", P2(POINTER, INT32),
                R1(UINT32))