-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathreduce.awk
More file actions
69 lines (64 loc) · 1.55 KB
/
Copy pathreduce.awk
File metadata and controls
69 lines (64 loc) · 1.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#########################################
# Handy Reduce Framework for Awk
#
# users must:
# call setkey in BEGIN with a string of column numbers separated by FS
# these represent the key columns
# e.g. setkey("1\t2\t3")
# sets the key columns to 1,2,3
# assumes FS matches tab
# define startrun function(key)
# key param is the key for the run
# called once per run on the first line of a run
# define reduce(key) function
# key param is the key for the run
# called once per line
# define endrun(key) function
# key param is the key for the run
# called once per run after the last line of a run
#
# reserves variables KEYCOLS and LASTKEY
# defines two functions
# setkey (described above)
# keymatches (internal function used to check if current line is part of
# current run)
function setkey(keystr) {
n=split(keystr, KEYCOLS);
if(n < 1) {
print "error splitting keystr \"" keystr "\" into parts" > "/dev/stderr";
exit 1;
}
}
function keymatches() {
for(i in KEYCOLS) {
if($KEYCOLS[i] != LASTKEY[i]) {
return 0;
}
}
return 1;
}
BEGIN {
}
{
if(NR == 1) {
for(i in KEYCOLS) {
LASTKEY[i] = $KEYCOLS[i];
}
startrun(LASTKEY);
reduce(LASTKEY);
} else if (keymatches()) {
reduce(LASTKEY);
} else {
endrun(LASTKEY);
for(i in KEYCOLS) {
LASTKEY[i] = $KEYCOLS[i];
}
startrun(LASTKEY);
reduce(LASTKEY);
}
}
END {
if(NR > 0) {
endrun(LASTKEY);
}
}