pebble/tools/binutils.py

# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os.path
import re
import sh
import subprocess
import sys
import tempfile


NM_LINE_PATTERN = re.compile(r"""([0-9a-f]+)\s+ # address
                             ([0-9a-f]+)\s+ # size
                             ([dDbBtTrR])\s+ # section type
                             (\S+) # name
                             \s*((\S+)\:([0-9]+))?$ # filename + line
                             """, flags=re.VERBOSE)


class Symbol(object):
    def __init__(self, name, size):
        self.name = name
        self.size = size

    def __str__(self):
        return '<Symbol %s: %u>' % (self.name, self.size)


class FileInfo(object):
    def __init__(self, filename):
        self.filename = filename
        self.size = 0
        self.symbols = {}

    def add_entry(self, symbol_name, size):
        if symbol_name in self.symbols:
            return

        self.size += size
        self.symbols[symbol_name] = Symbol(symbol_name, size)

    def remove_entry(self, symbol_name):
        result = self.symbols.pop(symbol_name, None)
        if result is not None:
            self.size -= result.size
        return result

    def pprint(self, verbose):
        print '  %s: size %u' % (self.filename, self.size)
        if verbose:
            l = sorted(self.symbols.itervalues(), key=lambda x: -x.size)
            for s in l:
                print '    %6u %-36s' % (s.size, s.name)

    def __str__(self):
        return '<FileInfo %s: %u>' % (self.filename, self.size)


class SectionInfo(object):
    def __init__(self, name):
        self.name = name
        self.count = 0
        self.size = 0
        self.files = {}

    def add_entry(self, name, filename, size):
        self.count += 1
        self.size += size

        if filename not in self.files:
            self.files[filename] = FileInfo(filename)

        self.files[filename].add_entry(name, size)

    def remove_unknown_entry(self, name):
        if 'Unknown' not in self.files:
            return
        result = self.files['Unknown'].remove_entry(name)
        if result is not None:
            self.size -= result.size
        return result

    def get_files(self):
        return self.files.values()

    def pprint(self, summary, verbose):
        print '%s: count %u size %u' % (self.name, self.count, self.size)

        if not summary:
            l = self.files.values()
            l = sorted(l, key=lambda f: -f.size)
            for f in l:
                f.pprint(verbose)


def analyze_elf(elf_file_path, sections_letters, use_fast_nm):
    """ Analyzes the elf file, using binutils.
        section_letters -- string of letters representing the sections to
                           analyze, e.g. 'tbd' => text, bss and data.
        use_fast_nm -- If False, a slow lookup method is used to avoid a bug in
                    `nm`. If True, the faster `nm -S -l` is used.
        Returns a dictionary with SectionInfo objects for each section.
    """
    def make_sections_dict(sections_letters):
        sections = {}
        for s in sections_letters:
            if s == 'b':
                sections['b'] = SectionInfo('.bss')
            elif s == 'd':
                sections['d'] = SectionInfo('.data')
            elif s == 't':
                sections['t'] = SectionInfo('.text')
            else:
                raise Exception('Invalid section <%s>, must be a combination'
                                ' of [bdt] characters\n' % s)
        return sections
    sections = make_sections_dict(sections_letters)

    generator = nm_generator(elf_file_path, use_fast_nm)
    for (_, section, symbol_name, filename, line, size) in generator:
        if not filename:
            filename = 'Unknown'
        if section in sections:
            sections[section].add_entry(symbol_name, filename, size)

    return sections


def nm_generator(elf_path, use_fast_nm=True):
    if use_fast_nm:
        return _nm_generator_fast(elf_path)
    else:
        return _nm_generator_slow(elf_path)


def _get_symbols_table(f):
    # NOTE: nm crashes when we pass in the -l command line option. As a
    # workaround, we use readelf to get the symbol to address mappings and then
    # we use addr2line to get file/lines from the addresses.
    infile = sh.arm_none_eabi_readelf('-s', '-W', f)

    line_pattern = re.compile(r"""\s+([0-9]+\:)\s+  # number
                                  ([0-9a-f]+)\s+    # address
                                  ([0-9]+)\s+       # size
                                  (\S+)\s+          # type
                                  (\S+)\s+          # Bind
                                  (\S+)\s+          # Visibility
                                  (\S+)\s+          # Ndx
                                  (\S+)             # symbol name
                                  """, flags=re.VERBOSE)

    def create_addr2line_process():
        return subprocess.Popen(['arm-none-eabi-addr2line', '-e', f],
                                stdin=subprocess.PIPE,
                                stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE)
    addr2line = create_addr2line_process()

    symbols = {}
    for line_num, line in enumerate(infile):
        if (line_num % 300) == 0:
            sys.stdout.write(".")
            sys.stdout.flush()

        match = line_pattern.match(line)

        if match is None:
            continue

        type = match.group(4)
        if type not in ['FUNC', 'OBJECT']:
            continue
        addr = match.group(2)
        symbol_name = match.group(8)

        success = False
        while not success:
            try:
                addr2line.stdin.write("0x%s\n" % addr)
                success = True
            except IOError:
                # This happens if the previous iteration caused an error
                addr2line = create_addr2line_process()

        src_file_line = addr2line.stdout.readline().strip()
        if src_file_line:
            # Some Bluetopia paths start with 'C:\...'
            components = src_file_line.split(':')
            src_file = ":".join(components[:-1])
            line = components[-1:][0]
        else:
            (src_file, line) = ('?', '0')
        symbols[symbol_name] = (src_file, line)

    addr2line.kill()

    print
    return symbols


# This method is quite slow, but works around a bug in nm.
def _nm_generator_slow(f):
    print "Getting list of symbols..."
    symbols = _get_symbols_table(f)
    print "Aggregating..."
    infile = sh.arm_none_eabi_nm('-S', f)

    line_pattern = re.compile(r"""([0-9a-f]+)\s+ # address
                                  ([0-9a-f]+)\s+ # size
                                  ([dDbBtTrR])\s+ # section type
                                  (\S+) # name
                                  """, flags=re.VERBOSE)

    for line in infile:
        match = line_pattern.match(line)

        if match is None:
            continue

        addr = int(match.group(1), 16)
        size = int(match.group(2), 16)
        section = match.group(3).lower()
        if section == 'r':
            section = 't'
        symbol_name = match.group(4)
        if symbol_name not in symbols:
            continue
        rel_file_path, line = symbols[symbol_name]
        if rel_file_path:
            rel_file_path = os.path.relpath(rel_file_path)

        yield (addr, section, symbol_name, rel_file_path, line, size)


# This method is much faster, and *should* work, but as of 2014-08-01, we get
# exceptions when we try to run nm -l on the tintin ELF file. So, the
# _nm_generator_slow() method above can be used as a workaround.
def _nm_generator_fast(f):
    """ Given a path to an .elf, generates tuples:
        (section, symbol_name, rel_file_path, line, size)
        Note, rel_file_path and line can be None.

    """
    infile = sh.arm_none_eabi_nm('-l', '-S', f)

    for line in infile:
        match = NM_LINE_PATTERN.match(line)

        if match is None:
            continue

        addr = int(match.group(1), 16)
        size = int(match.group(2), 16)

        section = match.group(3).lower()
        if section == 'r':
            section = 't'
        symbol_name = match.group(4)

        rel_file_path = match.group(6)
        if rel_file_path:
            rel_file_path = os.path.relpath(rel_file_path)

        line = match.group(7)
        if line:
            line = int(line)

        yield (addr, section, symbol_name, rel_file_path, line, size)


def size(elf_path):
    """ Returns size (text, data, bss)

    """
    output = subprocess.check_output(["arm-none-eabi-size", elf_path])

    lines = output.splitlines()
    if len(lines) < 2:
        return 0
    match = re.match("^\s*([0-9]+)\s+([0-9]+)\s+([0-9]+)", lines[1])
    if not match:
        return 0
    # text, data, bss
    return (int(match.groups()[0]),
            int(match.groups()[1]),
            int(match.groups()[2]))


def strip(elf_path):
    """ Strip debug info from specified .elf file
    """
    sh.arm_none_eabi_strip(elf_path)


def copy_elf_section(in_elf_path, out_elf_path, section_name_list):
    """ Creates out_elf_path containing only sections in 'section name list'
    """
    args = []
    for name in section_name_list:
        args.append('-j')
        args.append(name)
    args.append(in_elf_path)
    args.append(out_elf_path)
    sh.arm_none_eabi_objcopy(args)


def section_bytes(elf_path, section_name):
    """ Returns the bytes in a section of a given .elf file

    """
    with tempfile.NamedTemporaryFile() as temp:
        sh.arm_none_eabi_objcopy(['-j', section_name, '-O', 'binary',
                                  elf_path, temp.name])
        with open(temp.name) as f:
            return f.read()