Quantcast
Channel: Andrew Kelley
Viewing all articles
Browse latest Browse all 61

Zig: Already More Knowable Than C

$
0
0

Zig: Already More Knowable Than C

2017 February 13

There is a nifty article created back in 2015 that made its way onto Hacker News today: I Do Not Know C.

The author creates a delightful set of source code samples in which the reader is intended to determine the correctness, and if correct, predict the output of the provided code. If you have not done the exercises, I encourage you to take a moment to do so.

What follows are some of the examples translated into Zig for comparison. The numbers correspond to the numbers from the original post linked above, but the C code is embedded here for comparison.

1. Declaring the same variable twice

int i;
int i = 10;

In Zig:

var i = undefined;
var i = 10;

Output:

./test.zig:2:1: error: redefinition of 'i'
var i = 10;
^
./test.zig:1:1: note: previous definition is here
var i = undefined;
^

2. Null pointer

extern void bar(void);
void foo(int *x) {
    int y = *x;  /* (1) */
    if (!x) {    /* (2) */
        return;  /* (3) */
    }
    bar();
}

When this example is translated to Zig, you have to explicitly decide if the pointer is nullable. For example, if you used a bare pointer in Zig:

extern fn bar();

export fn foo(x: &c_int) {
    var y = *x;  // (1)
    if (x == null) {     // (2)
        return;    // (3)
    }
    bar();
}

Then it wouldn't even compile:

./test.zig:7:11: error: operator not allowed for type '?&c_int'
    if (x == null) {     // (2)
          ^

I think this error message can be improved, but even so it prevents a possible null related bug here.

If the code author makes the pointer nullable, then the natural way to port the C code to Zig would be:

extern fn bar();

export fn foo(x: ?&c_int) {
    var y = x ?? return;  // (1), (2), (3)
    bar();
}

This compiles to:

0000000000000000 <foo>:
   0:	48 85 ff             	test   %rdi,%rdi
   3:	74 05                	je     a <foo+0xa>
   5:	e9 00 00 00 00       	jmpq   a <foo+0xa>
   a:	c3                   	retq   

This does a null check, returns if null, and otherwise calls bar.

Perhaps a more faithful way to port the C code to Zig would be this:

extern fn bar();

fn foo(x: ?&c_int) {
    var y = ??x;  // (1)
    if (x == null) {     // (2)
        return;    // (3)
    }
    bar();
}

pub fn main(args: [][]u8) -> %void {
    foo(null);
}

The ?? operator unwraps the nullable value. It asserts that the value is not null, and returns the value. If the value is null then the behavior is undefined, just like in C.

However, in Zig, undefined behavior causes a crash in debug mode:

$ ./test
attempt to unwrap null
(...work-in-progress stack unwinding code follows...)
4196244  -> ./test.zig
4196024  -> ./test.zig
4195449  -> ./test.zig
4195318  -> ./test.zig
4196205  -> ./test.zig
4195688  -> ./test.zig
4195469  -> ./test.zig
4195356  -> ./test.zig
Aborted

This is a half-finished traceback implementation (lots of TODO items to complete before the 0.1.0 milestone), but the point is that Zig detected the undefined behavior and aborted.

In release mode, this example invokes undefined behavior just like in C. To avoid this, programmers are expected to choose one of these options:

  • Test code sufficiently in debug mode to catch undefined behavior abuse.
  • Utilize the safe-release option which includes the runtime undefined behavior safety checks.

5. strlen

int my_strlen(const char *x) {
    int res = 0;
    while(*x) {
        res++;
        x++;
    }
    return res;
}

In Zig, pointers generally point to single objects, while slices are used to refer to ranges of memory. So in practice you wouldn't need a strlen function, you would use some_bytes.len. But we can port this code over anyway:

export fn my_strlen(x: &const u8) -> c_int {
    var res: c_int = 0;
    while (x[res] != 0) {
        res += 1;
    }
    return res;
}

Here we must use pointer indexing because Zig does not support direct pointer arithmetic.

The compiler catches this problem:

./test.zig:3:14: error: expected type 'usize', found 'c_int'
    while (x[res] != 0) {
             ^

6. Print string of bytes backwards

#include <stdio.h>
#include <string.h>
int main() {
    const char *str = "hello";
    size_t length = strlen(str);
    size_t i;
    for(i = length - 1; i >= 0; i--) {
        putchar(str[i]);
    }
    putchar('\n');
    return 0;
}

Ported to Zig:

const c = @cImport({
    @cInclude("stdio.h");
    @cInclude("string.h");
});

export fn main(argc: c_int, argv: &&u8) -> c_int {
    const str = c"hello";
    const length: c.size_t = c.strlen(str);
    var i: c.size_t = length - 1;
    while (i >= 0; i -= 1) {
        c.putchar(str[i]);
    }
    c.putchar('\n');
    return 0;
}

It compiles fine but produces this output when run:

$ ./test
integer overflow
(...work-in-progress stack unwinding code follows...)
Aborted

8. Weirdo syntax

#include <stdio.h>
int main() {
    int array[] = { 0, 1, 2 };
    printf("%d %d %d\n", 10, (5, array[1, 2]), 10);
}

There is no way to express this code in Zig. Good riddance.

9. Unsigned overflow

unsigned int add(unsigned int a, unsigned int b) {
    return a + b;
}

In Zig:

const io = @import("std").io;

export fn add(a: c_uint, b: c_uint) -> c_uint {
    return a + b;
}

pub fn main(args: [][]u8) -> %void {
    %%io.stdout.printf("{}\n", add(@maxValue(c_uint), 1));
}

Output:

$ ./test 
integer overflow
(...work-in-progress stack unwinding code follows...)
4196260  -> ./test.zig
4196008  -> ./test.zig
4195433  -> ./test.zig
4195293  -> ./test.zig
4196199  -> ./test.zig
4195672  -> ./test.zig
4195453  -> ./test.zig
4195340  -> ./test.zig
Aborted

The + operator asserts that there will be no overflow. If you want twos complement wraparound behavior, that is possible with the +% operator instead:

export fn add(a: c_uint, b: c_uint) -> c_uint {
    return a +% b;
}

Now the output is:

$ ./test 
0

10. Signed overflow

int add(int a, int b) {
    return a + b;
}

In C signed and unsigned integer overflow work differently. In Zig, they work the same. + asserts that no overflow occurs, and +% performs twos complement wraparound behavior.

11. Negation overflow

int neg(int a) {
    return -a;
}

By now you can probably predict how this works in Zig.

const io = @import("std").io;

export fn neg(a: c_int) -> c_int {
    return -a;
}

pub fn main(args: [][]u8) -> %void {
    %%io.stdout.printf("{}\n", neg(@minValue(c_int)));
}

Output:

$ ./test 
integer overflow
(...work-in-progress stack unwinding code follows...)
4196260  -> ./test.zig
4196008  -> ./test.zig
4195433  -> ./test.zig
4195289  -> ./test.zig
4196194  -> ./test.zig
4195672  -> ./test.zig
4195453  -> ./test.zig
4195340  -> ./test.zig
Aborted

The -% wraparound variant of the negation operator works here too:

export fn neg(a: c_int) -> c_int {
    return -%a;
}

Output:

$ ./test 
-2147483648

12. Division overflow

int div(int a, int b) {
    assert(b != 0);
    return a / b;
}

Different operation, same deal.

const io = @import("std").io;

fn div(a: i32, b: i32) -> i32 {
    return a / b;
}

pub fn main(args: [][]u8) -> %void {
    %%io.stdout.printf("{}\n", div(@minValue(i32), -1));
}

Output:

$ ./test 
integer overflow
(...work-in-progress stack unwinding code follows...)
4196196  -> ./test.zig
4195944  -> ./test.zig
4195369  -> ./test.zig
4196709  -> ./test.zig
4196135  -> ./test.zig
4195608  -> ./test.zig
4195389  -> ./test.zig
4195276  -> ./test.zig
Aborted

Notably, if you execute the division operation at compile time, the overflow becomes a compile error (same for the other operations):

    %%io.stdout.printf("{}\n", comptime div(@minValue(i32), -1));

Output:

./test.zig:4:14: error: operation caused overflow
    return a / b;
             ^
./test.zig:8:44: note: called from here
    %%io.stdout.printf("{}\n", comptime div(@minValue(i32), -1));
                                           ^

Conclusion

It's clear from these incomplete stack trace listings that Zig is still a work-in-progress. Although to be fair, a crash in C doesn't have an error message or even a half-finished stack trace - it will typically print Segmentation fault and nothing else. And all the gdb debugging you can do with C works with Zig too.

Zig currently lacks a language specification and any sort of coherent documentation. Even so, it is on track to become a language that will boot out C as the simple, straightforward way to write system code.

Zig

A system programming language which prioritizes optimality, safety, and readability. Donate $1/month


Viewing all articles
Browse latest Browse all 61

Trending Articles