#!/usr/bin/python3 # # Copyright 2025. Quantum Corporation. All Rights Reserved. # StorNext is either a trademark or registered trademark of # Quantum Corporation in the US and/or other countries. # # cvdb perf trace analysis script # Processes cvdb perf trace output files and analyzes them for # I/O latency, can take text to csv. # import sys import glob import argparse import re def die(header=None, content=""): if (header != None): sys.stderr.write(header) sys.stderr.write(content) sys.stderr.write('\n') exit(1) def sizeof_fmt(num, suffix='B'): """Human readable numbers""" for unit in [' ', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']: if abs(num) < 1024.0: return "%6.2f %s%s" % (num, unit, suffix) num /= 1024.0 return "%.2f%s%s" % (num, 'Yi', suffix) def record_io(sequence, pid, ts, ino, offset, len, speed=None): if not ino in sequence: sequence[ino] = [] old = None else: entry = sequence[ino][-1] old = entry['offset'] + entry['len'] if old: point = {'ts' : ts, 'pid' : pid, 'offset' : offset, 'len' : len, 'stride' : offset - old} else: point = {'ts' : ts, 'pid' : pid, 'offset' : offset, 'len' : len, 'stride' : 0} if speed: point['speed'] = speed sequence[ino].append(point) return len(sequence[ino]) def record_start_io(sequence, cvp, point): if not cvp in sequence: sequence[cvp] = [] old = None else: entry = sequence[cvp][-1] old = entry['offset'] + entry['len'] point['stride'] = point['offset'] - old sequence[cvp].append(point) return len(sequence[cvp]) - 1 # # cvdb perf trace file creqated on Windows/Linux: # <1:11086-dd:5.545419> so # cvdb perf trace file creqated on Mac: # <1661782656.290113030:0:624.eca14540-dd> so # def extract_ts(line): bracket = line.find("> ") components = line[1:bracket].split(':') if (components[0].find('.') >= 0): # mac pid_proc = components[2].split('-') ts = components[0] else: # non-mac pid_proc = components[1].split('-') ts = components[2] if len(pid_proc) == 2: pid = pid_proc[0] proc = pid_proc[1] else: pid = pid_proc[1] proc = pid_proc[2] return pid, ts, proc def filter_line(line, min_time, max_time, process): bracket = line.find("> ") if bracket != -1: components = line[1:bracket].split(':') if (components[0].find('.') >= 0): # mac proc = components[2].split('-')[1] ts = float(components[0]) else: # check windows vs. linux m = win_regex.match(components[1]) if (m != None): match_length = len(m.group(0)) proc = components[1][match_length:] else: proc = components[1].split('-')[1] ts = float(components[2]) if process and proc != process: return 1 if ts > min_time and ts < max_time: return 0 return 1 def record_latency(sequence, ts, latency, bytes, sg_and_unit): second = int(round(ts)) if second in sequence: if sg_and_unit in sequence[second]: sequence[second][sg_and_unit]['count'] += 1 if latency < sequence[second][sg_and_unit]['min']: sequence[second][sg_and_unit]['min'] = latency elif latency > sequence[second][sg_and_unit]['max']: sequence[second][sg_and_unit]['max'] = latency sequence[second][sg_and_unit]['tot'] += latency sequence[second][sg_and_unit]['bytes'] += bytes return else: sequence[second][sg_and_unit] = { 'min' : latency, 'max' : latency, 'tot' : latency, 'bytes' : bytes, 'count' : 1 } else: unit_dat = { 'min' : latency, 'max' : latency, 'tot' : latency, 'bytes' : bytes, 'count' : 1 } sequence[second] = dict() sequence[second][sg_and_unit] = unit_dat def extract_dev_op(line, idx, dev_seq): pid, ts, proc = extract_ts(line) entries = line[idx:].split() latency = int(entries[5][1:-3]) if (len(entries) > 15): sg_and_unit = entries[15] + '-' + entries[13] else: sg_and_unit = entries[13] bytes = int(entries[9], 16) record_latency(dev_seq, float(ts), latency, bytes, sg_and_unit) def extract_proxy_op(line, idx, dev_seq): pid, ts, proc = extract_ts(line) entries = line[idx:].split() latency = int(entries[4][1:-3]) if (len(entries) > 14): sg_and_unit = entries[14] + '-' + entries[12] else: sg_and_unit = entries[12] bytes = int(entries[8], 16) record_latency(dev_seq, float(ts), latency, bytes, sg_and_unit) def device_read(dev_seq, filename): try: f = open(filename) except: die(None, "Cannot open " + filename) with f as fp: for line in fp: idx = line.find("Device Read Completed") if idx != -1: extract_dev_op(line, idx, dev_seq) else: idx = line.find("Proxy") if idx != -1: idx = line.find("Read Completed") if idx != -1: extract_proxy_op(line, idx, dev_seq) def device_write(dev_seq, filename): try: f = open(filename) except: die("Cannot open " + filename) exit(1) with f as fp: for line in fp: idx = line.find("Device Write Completed") if idx != -1: extract_dev_op(line, idx, dev_seq) else: idx = line.find("Proxy") if idx != -1: idx = line.find("Write Completed") if idx != -1: extract_proxy_op(line, idx, dev_seq) def extract_cvp_op(line, idx, pidspace, sequence): pid, ts, proc = extract_ts(line) entries = line[idx:].split() offset = int(entries[4], 16) len = int(entries[6], 10) cvp = entries[2] # here we need to insert into the sequence for an inode and # get the index back. In the pidspace we place, cvp, and seq idx. begin = { 'proc' : proc, 'pid' : pid, 'ts' : ts, 'offset' : offset, 'len' : len} seq_idx = record_start_io(sequence, cvp, begin) pidspace[pid] = { 'cvp' : cvp, 'idx' : seq_idx, 'begin' : begin, 'proc' : proc} def extract_rwcvp_op(line, idx, writes, pidspace, sequence): pid, ts, proc = extract_ts(line) entries = line[idx:].split() rw = entries[5] if int(rw, 16) == 0: if writes: return else: if writes == 0: return offset = int(entries[7], 16) length = int(entries[9], 16) cvp = entries[3] if (len(entries) > 17): inode = entries[17] name = line[idx:].split('/')[1] if (name[-1] == '\n'): name = name[:-1] inode_name_map[inode] = name # here we need to insert into the sequence for an inode and # get the index back. In the pidspace we place, cvp, and seq idx. begin = { 'proc' : proc, 'pid' : pid, 'ts' : ts, 'offset' : offset, 'len' : length} seq_idx = record_start_io(sequence, cvp, begin) pidspace[pid] = { 'cvp' : cvp, 'idx' : seq_idx, 'begin' : begin, 'proc' : proc} def extract_vfs_op(line, idx, pidspace, sequence, ino_map, read_map): pid, ts, proc = extract_ts(line) entries = line[idx:].split() speed = int(entries[3]) len = int(entries[9], 16) ino = entries[11] mode = entries[2] if pid in pidspace: info = pidspace[pid] cvp = info['cvp'] seq_idx = info['idx'] if not cvp in ino_map: ino_map[cvp] = ino if not ino in read_map: read_map[ino] = dict() read_map[ino][mode] = len else: if mode in read_map[ino]: read_map[ino][mode] += len else: read_map[ino][mode] = len sequence[cvp][seq_idx]['len'] = len sequence[cvp][seq_idx]['end_ts'] = ts sequence[cvp][seq_idx]['speed'] = speed def process_file(sequence, dev_seq, ino_map, read_map, filename, start_ts, end_ts, process, reads, writes, pidspace): try: f = open(filename) except: die("Cannot open " + filename) with f as fp: for line in fp: if filter_line(line, start_ts, end_ts, process): continue if reads: idx = line.find("VFS Read") if idx != -1: extract_vfs_op(line, idx, pidspace, sequence, ino_map, read_map) continue idx = line.find("Device Read Completed:") if idx != -1: extract_dev_op(line, idx, dev_seq) continue else: idx = line.find("Proxy") if idx != -1: idx = line.find("Read Completed") if idx != -1: extract_proxy_op(line, idx, dev_seq) continue idx = line.find("Device Read Start:") if idx != -1: continue idx = line.find("PERF: rwcvp") if idx != -1: extract_rwcvp_op(line, idx, False, pidspace, sequence) # drop through because writes can use this line too if writes: idx = line.find("PERF: rwcvp") if idx != -1: extract_rwcvp_op(line, idx, True, pidspace, sequence) continue idx = line.find("VFS Write") if idx != -1: extract_vfs_op(line, idx, pidspace, sequence, ino_map, read_map) continue idx = line.find("Device Write Completed:") if idx != -1: extract_dev_op(line, idx, dev_seq) continue else: idx = line.find("Proxy") if idx != -1: idx = line.find("Write Completed") if idx != -1: extract_proxy_op(line, idx, dev_seq) continue idx = line.find("Device Write Start:") if idx != -1: continue def track_sequence(sequence, entry): end = entry['offset'] + entry['len'] if sequence == None: sequence = {'offset' : entry['offset'], 'len' : entry['len'], 'start' : entry['ts']} else: if sequence['len'] + sequence['offset'] == entry['offset']: sequence['len'] += entry['len'] else: sequence = {'offset' : entry['offset'], 'len' : entry['len'], 'start' : entry['ts']} sequence['end'] = entry['ts'] return sequence def report_seq(prefix, bytes, start, end, rate=False): elapsed = float(end) - float(start) if elapsed: if rate: data_rate = 'speed: ' + sizeof_fmt(bytes / elapsed, 'Bytes/sec') else: data_rate = '' print(prefix + '{0} in {1:9.6f} seconds {2}'.format(sizeof_fmt(bytes, 'Bytes'), elapsed, data_rate)) def filter_sequence(ino_seq, min_size): total = 0 for entry in ino_seq: total += entry['len'] if total > min_size: return 1 return 0 def print_sequence(ino_seq, ino, idx, read_map, do_details, do_csv, do_per_sec, do_seq): total = 0 entry = ino_seq[0] start_ts = entry['ts'] pid = entry['pid'] sequence = None biggest_seq = None if not do_csv: print () if ino in inode_name_map: print('inode: ' + str(ino) + ' pid: ' + str(pid) + ' filename: ' + inode_name_map[ino]) else: print('inode: ' + str(ino) + ' pid: ' + str(pid)) print () sec_len = 0 sec_ts = float(start_ts) for entry in ino_seq: elapsed_str = '' if 'stride' in entry: stride = entry['stride'] else: stride = 0 if 'end_ts' in entry: end_ts = float(entry['end_ts']) begin_ts = float(entry['ts']) elapsed = end_ts - begin_ts elapsed_us = int(elapsed * 1000000.0) elapsed_str = 'elapsed: {0:8} us '.format(elapsed_us) if do_per_sec: if end_ts - sec_ts <= 1.0: sec_len += entry['len'] sec_end = end_ts elif sec_len: prefix= 'ts: {0:12.6f} '.format(sec_ts) if begin_ts - sec_ts < 1.5: report_seq(prefix, sec_len, sec_ts, begin_ts, True) else: report_seq(prefix, sec_len, sec_ts, sec_end, True) sec_ts = begin_ts sec_end = end_ts sec_len = entry['len'] entry['elapsed'] = elapsed_str total += entry['len'] if do_csv: if 'end_ts' in entry: name = '' if ino in inode_name_map: name = inode_name_map[ino] print ('{0:12}, {1:12}, {2}, {3}, {4}, {5:12}, {6:8}, {7:8}'.format( entry['ts'], entry['end_ts'], ino, name, entry['pid'], entry['offset'], entry['len'], entry['elapsed'])) elif do_details: prefix= 'ts: {0:12} {1} pid-proc: {2:16} '.format(entry['ts'], entry['elapsed'], str(entry['pid']) + '-' + entry['proc']) if 'speed' in entry: print (prefix + 'offset: {0:12} len {1:8} speed {2:6} MB/s stride {3:12}'.format( entry['offset'], entry['len'], entry['speed'], stride)) else: print (prefix + 'offset: {0:12} len {1:8} stride {2:12}'.format( entry['offset'], entry['len'], stride)) if do_seq: sequence = track_sequence(sequence, entry) if biggest_seq == None: biggest_seq = sequence elif sequence['len'] > biggest_seq['len']: biggest_seq = sequence if do_per_sec and sec_len: prefix= 'ts: {0:12.6f} '.format(sec_ts) report_seq(prefix, sec_len, sec_ts, sec_end, True) if do_details or do_per_sec: if 'end_ts' in entry: end_ts = entry['end_ts'] else: end_ts = entry['ts'] print () report_seq('Total: ', total, start_ts, end_ts) for mode in read_map: print ('{0:9} {1:20}'.format(mode, sizeof_fmt(read_map[mode], 'Bytes'))) print () if do_seq: report_seq('Sequence: ', biggest_seq['len'], biggest_seq['start'], biggest_seq['end'], True) def print_device_io(dev_seq): print () print ('Device I/O per second') print ('Note multiple stripe groups could be combined here') print () for sec in sorted(dev_seq): record = dev_seq[sec] print ('Second ' + str(sec)) for sg_and_unit in record: unit_dat = record[sg_and_unit] avg = unit_dat['tot'] / unit_dat['count'] print( 'sg-unit {0:7} count: {1:6} min: {2:8}us max:' ' {3:8}us avg: {4:8.0f}us {5:>16}' .format(sg_and_unit, unit_dat['count'], unit_dat['min'], unit_dat['max'], avg, sizeof_fmt(unit_dat['bytes'], 'Bytes')) ) def usage(name=None): return ''' snseq -h, --help # show this help and exit snseq {read,write,rdwr} {--persec,--details,--csv} \\ [filter_options] file(s) # Process VFS records snseq devread file(s) # Process only device reads snseq devwrite file(s) # Process only device writes filter_options are: --seq # track largest sequential I/O sequence --start start_time # start time offset --end end_time # end time offset --proc process_name # filter by process name --minsize byte_count # filter out files smaller than byte_count examples: snseq read --details cvdbout* # Process VFS reads, detailed mode snseq write --persec cvdbout* # VFS writes using per-second mode snseq rdwr --csv cvdbout* # Read and writes, emit CSV snseq devwrite cvdbout* # Device writes ''' def file_arg_set(parser): option_group = parser.add_mutually_exclusive_group() option_group.add_argument('--persec', action='store_true', default=False, help='data rate per second') option_group.add_argument('--details', action='store_true', default=False, help='output detailed data') option_group.add_argument('--csv', action='store_true', default=False, help='output csv format data') parser.add_argument('--seq', action='store_true', default=False, help="track largest sequential I/O sequence") parser.add_argument('--dev', action='store_true', default=False, help='output device I/O summary per second') parser.add_argument('--start', metavar='time', type=float, default=0.0, help='start time offset') parser.add_argument('--end', metavar='time', type=float, default=9999999999.0, help='end time offset') parser.add_argument('--proc', metavar='process', default=None, help='filter by process name') parser.add_argument('--minsize', metavar='bytes', default=0, type=int, help='filter out files with bytes or less data') parser.add_argument('files', nargs='*', help='cvdb perf output to process') argparser = argparse.ArgumentParser(usage=usage(),add_help=False) subparsers = argparser.add_subparsers(metavar="", dest="subcommand") parseread = subparsers.add_parser('read') parsewrite = subparsers.add_parser('write') parserdwr = subparsers.add_parser('rdwr') parsedevrd = subparsers.add_parser('devread') parsedevwr = subparsers.add_parser('devwrite') file_arg_set(parseread) file_arg_set(parsewrite) file_arg_set(parserdwr) parsedevrd.add_argument('files', nargs='*', help='cvdb perf output to process') parsedevwr.add_argument('files', nargs='*', help='cvdb perf output to process') if (len(sys.argv) == 1 or sys.argv[1] == "-h" or sys.argv[1] == "--help"): die("usage:", usage()) # hack to avoid double usage output cnt = 0 for arg in sys.argv: if arg == "--details" or arg == "--csv" or arg == "--persec": cnt = cnt + 1 if (cnt > 1): print("usage:" + usage()) print(">>>> Only one of --details, --csv, or --persec may be supplied <<<<") exit(1) args = argparser.parse_args() if (len(args.files) == 0): print("usage:" + usage()) print(">>>> " + args.subcommand + ": No cvdb trace filenames supplied <<<<\n") exit(1) # # The following is needed because Windows typically does # not do file globbing in its shells. Doesn't seem to hurt # when running under a shell that does globbing. # list=[] for arg in sorted(args.files): globres = glob.glob(arg) if (len(globres) > 0): for file in glob.glob(arg): list.append(file) else: list.append(arg) seq = dict() dev_seq = dict() inode_map = dict() read_type_map = dict() inode_name_map = dict() do_read = False do_write = False if args.subcommand == 'read': do_read = True do_write = False elif args.subcommand == 'write': do_read = False do_write = True elif args.subcommand == 'rdwr': do_read = True do_write = True if cnt == 0 and (do_read or do_write): print("usage:" + usage()) print(">>>> When using \"" + args.subcommand + "\", one of --details, --csv, or --persec must be supplied <<<<") exit(1) win_regex = re.compile("[0-9]+-[0-9a-f]{12,}-") if do_read or do_write: start_time = args.start end_time = args.end pidspace = dict() for arg in sorted(args.files): process_file(seq, dev_seq, inode_map, read_type_map, arg, start_time, end_time, args.proc, do_read, do_write, pidspace) if args.csv == True: print ('timestamp, finished, inode, file, pid, offset, len, elapsed') idx = 0 # sort inodes by first access time seq = sorted(seq.items(), key=lambda v: v[0]) if (len(seq) == 0 and (args.dev == 0 or len(dev_seq) == 0)): print("No \"" + args.subcommand + "\" records found in trace files") exit(0) # output results for entry in seq: cvp = entry[0] ino_seq = entry[1] if cvp in inode_map: ino = inode_map[cvp] if filter_sequence(ino_seq, args.minsize): print_sequence(ino_seq, ino, idx, read_type_map[ino], args.details, args.csv, args.persec, args.seq) idx = idx + 1 if args.dev: print_device_io(dev_seq) elif args.subcommand == 'devread': for arg in sorted(args.files): device_read(dev_seq, arg) if (len(dev_seq) == 0): sys.stderr.write("No \"device read\" records found in trace files\n") exit(0) print_device_io(dev_seq) elif args.subcommand == 'devwrite': for arg in sorted(args.files): device_write(dev_seq, arg) if (len(dev_seq) == 0): sys.stderr.write("No \"device write\" records found in trace files\n") exit(0) print_device_io(dev_seq) exit(0)