Cascading looks quite interesting. Here is a python program that does something similar to the Technical Overview seen main
in the python program.
#!/usr/bin/env python # encoding: utf-8 import sys def input(theFile, pipe): """ pushes a file a line at a time to a coroutine pipe """ for line in theFile: pipe.send(line) pipe.close() @coroutine def extract(expression, pipe, group = 0): """ extract the group from a regex """ import re r = re.compile(expression) while True: line = (yield) match = r.search(line) if match: pipe.send(match.group(0)) @coroutine def sort(pipe): """ sort the input on a pipe """ import heapq heap = [] try: while True: line = (yield) heapq.heappush(heap, line) except GeneratorExit: while heap: pipe.send(heapq.heappop(heap)) @coroutine def group(groupPipe, pipe): """ sends consectutive matching lines from pipe to groupPipe """ cur = None g = None while True: line = (yield) if cur is None: g = groupPipe(pipe) elif cur != line: g.close() g = groupPipe(pipe) g.send(line) cur = line @coroutine def uniq(pipe): """ implements uniq -c """ lines = 0 try: while True: line = (yield) lines += 1 except GeneratorExit: pipe.send('%s\t%s' % (lines, line)) @coroutine def output(theFile): while True: line = (yield) theFile.write(line + '\n') def main(): input(sys.stdin, extract( r'^([^ ]+)', sort( group( uniq, output(sys.stdout) ) ) ) ) if __name__ == '__main__': main()
You can achieve the same results with the unix command line:
cat access.log | cut -d ' ' -f 1 | sort | uniq -c