#!/usr/bin/perl -w # gendump : Generates an assembly dump of a given executable file. # Copyright (C) 2002 Dion Mendel # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, # Boston, MA 02111-1307, USA. # gendump generates an initial assembly dump of an executable file. # Indexed jumps found in the disassembly are translated (e.g. switch # statements). # All non assembly lines are commented with either '#' or ';', or are blank. # All other lines are assumed to contain assembly code. # All decomp filters will apply to this generated file. # # Sample usage: # % gendump the-binary > dump use Getopt::Long "GetOptions"; use FileHandle; use strict 'vars'; use vars '$VERSION', '$Verbose', '$Raw', '$OBJDUMP'; # full path to objdump program $OBJDUMP = "/usr/bin/objdump"; $VERSION = "1.1"; # version of this program $Verbose = 1; $Raw = 0; &parse_command_line_for_options(); &usage if (scalar @ARGV != 1); my $filename = $ARGV[0]; my ($next_start, $start, $end, $obj, @jumps); # get sorted list of jump indexes @jumps = sort {$a->{end_opcodes} <=> $b->{end_opcodes} } &get_indexed_jumps($filename); # dump opcode data intersperced with jump indexes $next_start = 0; foreach $obj (@jumps) { $start = $next_start; $end = $obj->{end_opcodes}; $next_start = $obj->{start_indexes} + 4 * $obj->{num_indexes}; &dump($filename, $start, $end); &dump_indexes($filename, $obj->{num_indexes}, $obj->{start_indexes}); } # dump last &dump($filename, $next_start, 0xffffffff); exit (0); ############################### Output Functions ############################## # ----------------------------------------------------------------------------- # Prints the jump string for the given index. # Params: $fh file handle to output to # $index index number # $str hex string in little endian order # Returns: None sub print_index($$$) { my ($fh, $index, $str) = @_; my ($val); ($val = $str) =~ s/(..)(..)(..)(..)/0x$4$3$2$1/; printf $fh "# Indexed jump %02x %08x\n", $index, oct($val); } # ----------------------------------------------------------------------------- # Outputs jump index data from the given filename. # Params: $filename name of file to disassemble # $num number of jump indexes # $start starting offset # Returns: None sub dump_indexes($$$) { my ($filename, $num, $start) = @_; my ($cmd, $fh, $line, $end, $count, $num_so_far); $end = $start + 4 * $num; # perform disassembly $fh = new FileHandle; $cmd = "$OBJDUMP --start-address=$start --stop-address=$end " . "-s -j .text $filename"; open $fh, "$cmd |" or die "couldn't run objdump: $!"; $num_so_far = 0; $count = 0; while ($line = <$fh>) { chomp($line); # skip first 3 lines of output if ($count < 4) { $count += 1; next; } if (($num - $num_so_far) < 4) { if (($num - $num_so_far) > 0) { &print_index(STDOUT, $num_so_far, substr($line, 9, 8)); $num_so_far++; if (($num - $num_so_far) > 0) { &print_index(STDOUT, $num_so_far, substr($line, 18, 8)); $num_so_far++; if (($num - $num_so_far) > 0) { &print_index(STDOUT, $num_so_far, substr($line, 27, 8)); $num_so_far++; } } } } else { &print_index(STDOUT, $num_so_far, substr($line, 9, 8)); $num_so_far++; &print_index(STDOUT, $num_so_far, substr($line, 18, 8)); $num_so_far++; &print_index(STDOUT, $num_so_far, substr($line, 27, 8)); $num_so_far++; &print_index(STDOUT, $num_so_far, substr($line, 36, 8)); $num_so_far++; } } close $fh; } # ----------------------------------------------------------------------------- # Outputs opcode data from the given filename. # Params: $filename name of file to disassemble # $start starting offset # $end ending offset # Returns: None sub dump($$$) { my ($filename, $start, $end) = @_; my ($cmd, $fh, $line, $offset, $data, $opcodes, $count); # perform disassembly $fh = new FileHandle; $cmd = "$OBJDUMP --start-address=$start --stop-address=$end " . "--show-raw-insn -d -j .text $filename"; open $fh, "$cmd 2>/dev/null |" or die "couldn't run objdump: $!"; $count = 0; # cleanup each line before outputing while ($line = <$fh>) { chomp($line); # skip first 5 lines of output if ($count < 6) { $count += 1; next; } if (index($line, "\t") != -1) { # all assembly lines include tab characters (undef, $data, $opcodes) = split(/\t/, $line); $offset = &line_offset($line); # ensure consistent output for jumps and calls if (defined $opcodes) { # all calls have operand as 8digit hex value preceeded by 0x $opcodes =~ s/(call\s+)(0x)?([0-9a-f]+)/ sprintf("%s0x%08x", $1, oct('0x' . $3))/eg; # all jumps have operand as 8digit hex value preceeded by 0x $opcodes =~ s/(j[a-z]+\s+)(0x)?([0-9a-f]+)/ sprintf("%s0x%08x", $1, oct('0x' . $3))/eg; } else { $opcodes = ""; } # determine the line to output if ($Raw) { # compact data to save screen space if (defined $data) { chomp($data); $data =~ s/ //g; } $line = sprintf("%08x: %-15s%s", $offset, $data, $opcodes); } else { if ($opcodes ne "") { $line = sprintf("%08x: %s", $offset, $opcodes); } else { # skip lines with no opcode data next; } } } elsif (length($line) != 0) { # comment non blank lines $line =~ s/^/# /; } print STDOUT $line, "\n"; } close $fh; } # ----------------------------------------------------------------------------- # Returns the offset found at the start of a given line. # Params: $line - line from objdump output # Returns: offset found at the start of the line sub line_offset($) { my ($line) = @_; my $str; # get offset of current line $str = '0x' . substr($line, 0, 8); $str =~ s/ /0/g; return oct($str); } # ----------------------------------------------------------------------------- # Given the filename of an executable, disassemble it and attempt to # find the position of jump indexes (switch statements). # Params: $filename - name of executable file # Returns: list of hash objects containing 'end_opcodes', 'num_indexes' and # 'start_indexes' sub get_indexed_jumps($) { my ($filename) = @_; my ($line, $prev1, $prev2, $prev3, $prev4, $prev5, $ja_line, $cmp_line); my ($jmp_indexes, $reg, $num_indexes, $end_opcodes); my @jumps = (); # perform disassembly my $fh = new FileHandle; my $cmd = "$OBJDUMP -d -j .text $filename"; open $fh, "$cmd 2>/dev/null |" or die "couldn't run objdump: $!"; $prev5 = ""; $prev4 = ""; $prev3 = ""; $prev2 = ""; $prev1 = ""; while ($line = <$fh>) { chomp($line); $prev5 = $prev4; $prev4 = $prev3; $prev3 = $prev2; $prev2 = $prev1; $prev1 = $line; # Search for something like the following: # cmp $0xb,%eax # ja 0x8048eb8 # jmp *0x804832c(,%eax,4) # Which specifies a jump table of 11 elements starting at # 0x804832c. # if ($prev2 =~ /jmp\s+\*(0x)?([0-9a-f]+)\(,(%...),4\)/) { $jmp_indexes = oct('0x' . $2); $reg = $3; if ($prev3 =~ /mov\s+((0x)?[0-9a-f]+\(%ebp\)),$reg/) { $reg = $2; $ja_line = $prev4; $cmp_line = $prev5; } else { $ja_line = $prev3; $cmp_line = $prev4; } if ($ja_line =~ /ja\s+(0x)?[0-9a-f]+/) { if ($cmp_line =~ /cmpl?\s+\$(0x)?([0-9a-f]+),$reg/) { $num_indexes = oct('0x' . $2) + 1; $end_opcodes = &line_offset($line); push @jumps, { end_opcodes => $end_opcodes, num_indexes => $num_indexes, start_indexes => $jmp_indexes }; } else { die "cmp expected `$cmp_line'"; } } else { print $prev3, "\n"; die "ja expected"; } } } close $fh; return @jumps; } ############################### Usage Functions ############################### # ----------------------------------------------------------------------------- # Parses the command line for any specified options. Sets the appropriate # option flags if options are specified. Prints usage info if invalid options # are given. # Returns: nothing sub parse_command_line_for_options() { my ($want_raw) = 0; my ($want_quiet) = 0; my ($want_version) = 0; my ($want_help) = 0; &GetOptions("q|quiet" => \$want_quiet, "r|raw" => \$want_raw, "V|version" => \$want_version, "h|help" => \$want_help, ); if ($want_version) { print "$0 $VERSION\n"; exit 0; } if ($want_help) { &usage(); } if ($want_raw) { $Raw = 1; } $Verbose = !$want_quiet; } # ----------------------------------------------------------------------------- # Prints a nice usage message to stdout, and then exits. sub usage() { print <<"_END"; $0 v${VERSION} A program to generate an initial assembly dump of a binary file. The generated file is used as a basis to apply the various decomp filters. Usage: $0 [options] [file_name] filename is the name of the executable file to process. Options: -r, --raw include raw opcodes in output dump -V, --version outputs version information and exits -h, --help displays this help and exits _END exit 1; }