使用 Zig 进行简单的日志分析

问题描述

https://benhoyt.com/writings/count-words/ 的启发,我尝试用多种语言重写内部日志分析脚本(我不会像文章中那样深入!)。

在 Go(我自己)和 Rust(在 SO 的帮助下)之后,我目前坚持使用 Zig。我或多或少了解了 https://github.com/benhoyt/countwords/blob/master/simple.zig,但仍然很难按照这些方式翻译我的原件......特别是,使用带有元组键的哈希,在解析和打印时处理月份名称......

Python 中的原始脚本:

import sys

months = { "Jan": 1,"Feb": 2,"Mar": 3,"Apr": 4,"May": 5,"Jun": 6,"Jul": 7,"Aug": 8,"Sep": 9,"Oct": 10,"Nov": 11,"Dec": 12 }

months_r = { v:k for k,v in months.items() }

totals = {}

for line in sys.stdin:
    if "redis" in line and "Partial" in line:
        f1,f2 = line.split()[:2]
        w = (months[f1],int(f2))
        totals[w] = totals.get(w,0) + 1

for k in sorted(totals.keys()):
    print(months_r[k[0]],k[1],totals[k])

有人能熟练使用最近的 Zig 吗?

非常感谢!

解决方法

Solution 来自 Zig 论坛

const std = @import("std");

const Key = struct { month: u4,day: u5 };

fn keyHash(key: Key) u64 {
    return @as(u64,key.month) << 32 | @as(u64,key.day);
}

const Totals = std.HashMap(
    Key,usize,keyHash,std.hash_map.getAutoEqlFn(Key),std.hash_map.default_max_load_percentage,);

const Item = struct { key: Key,count: usize };

fn itemSort(context: void,lhs: Item,rhs: Item) bool {
    return keyHash(lhs.key) < keyHash(rhs.key);
}

// zig fmt: off
const months = std.ComptimeStringMap(u4,.{
    .{ "Jan",1 },.{ "Feb",2 },.{ "Mar",3 },.{ "Apr",4 },.{ "May",5 },.{ "Jun",6 },.{ "Jul",7 },.{ "Aug",8 },.{ "Sep",9 },.{ "Oct",10 },.{ "Nov",11 },.{ "Dec",12 },});

const months_r = [_][]const u8{
    "(padding)","Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec",};
// zig fmt: on

pub fn main() !void {
    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
    defer if (gpa.deinit()) std.log.err("memory leak detected",.{});
    const allocator = &gpa.allocator;

    var totals = Totals.init(allocator);
    defer totals.deinit();

    const stdin = std.io.bufferedReader(std.io.getStdIn().reader()).reader();
    var buf: [4096]u8 = undefined;
    while (try stdin.readUntilDelimiterOrEof(&buf,'\n')) |line| {
        if (std.mem.indexOf(u8,line,"redis") == null or std.mem.indexOf(u8,"Partial") == null)
            continue;

        var it = std.mem.tokenize(line,&std.ascii.spaces);
        const month = months.get(it.next().?).?;
        const day = try std.fmt.parseUnsigned(u5,it.next().?,10);

        const res = try totals.getOrPut(.{ .month = month,.day = day });
        if (res.found_existing)
            res.entry.value += 1
        else
            res.entry.value = 1;
    }

    var stdout = std.io.bufferedWriter(std.io.getStdOut().writer());
    defer stdout.flush() catch std.log.err("stdout flushing failed",.{});
    const out = stdout.writer();

    var items = try allocator.alloc(Item,totals.count());
    defer allocator.free(items);

    {
        var it = totals.iterator();
        var i: usize = 0;
        while (it.next()) |kv| : (i += 1) {
            items[i] = .{ .key = kv.key,.count = kv.value };
        }
    }

    std.sort.sort(Item,items,{},itemSort);

    for (items) |it| {
        try out.print("{s} {d} {d}\n",.{ months_r[it.key.month],it.key.day,it.count });
    }
}