From 405801d8a8be734425eca4f3eebc56287804ac93 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 5 Feb 2023 10:04:34 +0100 Subject: [PATCH] macho: temp fix alignment and enable some logs --- src/link/MachO/Object.zig | 80 ++++++++++++++++++++++++++------------ src/link/MachO/ZldAtom.zig | 29 +++++++------- src/link/MachO/zld.zig | 22 +++++------ 3 files changed, 79 insertions(+), 52 deletions(-) diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 401184da515..05638c1f858 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -54,12 +54,18 @@ atom_by_index_table: []AtomIndex = undefined, /// Can be undefined as set together with in_symtab. globals_lookup: []i64 = undefined, +/// All relocs sorted and flattened. +relocs: std.ArrayListUnmanaged(macho.relocation_info) = .{}, +sect_relocs_lookup: std.ArrayListUnmanaged(u32) = .{}, + atoms: std.ArrayListUnmanaged(AtomIndex) = .{}, pub fn deinit(self: *Object, gpa: Allocator) void { self.atoms.deinit(gpa); gpa.free(self.name); gpa.free(self.contents); + self.relocs.deinit(gpa); + self.sect_relocs_lookup.deinit(gpa); if (self.in_symtab) |_| { gpa.free(self.source_symtab_lookup); gpa.free(self.source_address_lookup); @@ -101,6 +107,10 @@ pub fn parse(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch) return error.MismatchedCpuArchitecture; } + const nsects = self.getSourceSections().len; + try self.sect_relocs_lookup.resize(allocator, nsects); + mem.set(u32, self.sect_relocs_lookup.items, 0); + var it = LoadCommandIterator{ .ncmds = self.header.ncmds, .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds], @@ -110,13 +120,11 @@ pub fn parse(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch) .SYMTAB => { const symtab = cmd.cast(macho.symtab_command).?; self.in_symtab = @ptrCast( - [*]const macho.nlist_64, - @alignCast(@alignOf(macho.nlist_64), &self.contents[symtab.symoff]), + [*]align(1) const macho.nlist_64, + self.contents.ptr + symtab.symoff, )[0..symtab.nsyms]; self.in_strtab = self.contents[symtab.stroff..][0..symtab.strsize]; - const nsects = self.getSourceSections().len; - self.symtab = try allocator.alloc(macho.nlist_64, self.in_symtab.?.len + nsects); self.source_symtab_lookup = try allocator.alloc(u32, self.in_symtab.?.len); self.strtab_lookup = try allocator.alloc(u32, self.in_symtab.?.len); @@ -192,6 +200,17 @@ const SymbolAtIndex = struct { return mem.sliceTo(@ptrCast([*:0]const u8, ctx.in_strtab.?.ptr + off), 0); } + fn getSymbolSeniority(self: SymbolAtIndex, ctx: Context) u2 { + const sym = self.getSymbol(ctx); + if (!sym.ext()) { + const sym_name = self.getSymbolName(ctx); + if (mem.startsWith(u8, sym_name, "l") or mem.startsWith(u8, sym_name, "L")) return 0; + return 1; + } + if (sym.weakDef() or sym.pext()) return 2; + return 3; + } + /// Performs lexicographic-like check. /// * lhs and rhs defined /// * if lhs == rhs @@ -206,23 +225,15 @@ const SymbolAtIndex = struct { if (lhs.sect() and rhs.sect()) { if (lhs.n_value == rhs.n_value) { if (lhs.n_sect == rhs.n_sect) { - if (lhs.ext() and rhs.ext()) { - if ((lhs.pext() or lhs.weakDef()) and (rhs.pext() or rhs.weakDef())) { - return false; - } else return rhs.pext() or rhs.weakDef(); - } else { - const lhs_name = lhs_index.getSymbolName(ctx); - const lhs_temp = mem.startsWith(u8, lhs_name, "l") or mem.startsWith(u8, lhs_name, "L"); - const rhs_name = rhs_index.getSymbolName(ctx); - const rhs_temp = mem.startsWith(u8, rhs_name, "l") or mem.startsWith(u8, rhs_name, "L"); - if (lhs_temp and rhs_temp) { - return false; - } else return rhs_temp; - } + const lhs_senior = lhs_index.getSymbolSeniority(ctx); + const rhs_senior = rhs_index.getSymbolSeniority(ctx); + if (lhs_senior == rhs_senior) { + return lessThanByNStrx(ctx, lhs_index, rhs_index); + } else return lhs_senior < rhs_senior; } else return lhs.n_sect < rhs.n_sect; } else return lhs.n_value < rhs.n_value; } else if (lhs.undf() and rhs.undf()) { - return false; + return lessThanByNStrx(ctx, lhs_index, rhs_index); } else return rhs.undf(); } @@ -393,6 +404,16 @@ pub fn splitIntoAtoms(self: *Object, zld: *Zld, object_id: u31) !void { zld.sections.items(.header)[out_sect_id].sectName(), }); + // Parse all relocs for the input section, and sort in descending order. + // Previously, I have wrongly assumed the compilers output relocations for each + // section in a sorted manner which is simply not true. + const start = @intCast(u32, self.relocs.items.len); + if (self.getSourceRelocs(section.header)) |relocs| { + try self.relocs.appendUnalignedSlice(gpa, relocs); + std.sort.sort(macho.relocation_info, self.relocs.items[start..], {}, relocGreaterThan); + } + self.sect_relocs_lookup.items[section.id] = start; + const cpu_arch = zld.options.target.cpu.arch; const sect_loc = filterSymbolsBySection(symtab[sect_sym_index..], sect_id + 1); const sect_start_index = sect_sym_index + sect_loc.index; @@ -559,7 +580,7 @@ pub fn getSourceSections(self: Object) []const macho.section_64 { } else unreachable; } -pub fn parseDataInCode(self: Object) ?[]const macho.data_in_code_entry { +pub fn parseDataInCode(self: Object) ?[]align(1) const macho.data_in_code_entry { var it = LoadCommandIterator{ .ncmds = self.header.ncmds, .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds], @@ -569,10 +590,7 @@ pub fn parseDataInCode(self: Object) ?[]const macho.data_in_code_entry { .DATA_IN_CODE => { const dice = cmd.cast(macho.linkedit_data_command).?; const ndice = @divExact(dice.datasize, @sizeOf(macho.data_in_code_entry)); - return @ptrCast( - [*]const macho.data_in_code_entry, - @alignCast(@alignOf(macho.data_in_code_entry), &self.contents[dice.dataoff]), - )[0..ndice]; + return @ptrCast([*]align(1) const macho.data_in_code_entry, self.contents.ptr + dice.dataoff)[0..ndice]; }, else => {}, } @@ -632,11 +650,23 @@ pub fn getSectionAliasSymbolPtr(self: *Object, sect_id: u8) *macho.nlist_64 { return &self.symtab[self.getSectionAliasSymbolIndex(sect_id)]; } -pub fn getRelocs(self: Object, sect: macho.section_64) []align(1) const macho.relocation_info { - if (sect.nreloc == 0) return &[0]macho.relocation_info{}; +fn getSourceRelocs(self: Object, sect: macho.section_64) ?[]align(1) const macho.relocation_info { + if (sect.nreloc == 0) return null; return @ptrCast([*]align(1) const macho.relocation_info, self.contents.ptr + sect.reloff)[0..sect.nreloc]; } +pub fn getRelocs(self: Object, sect_id: u16) []const macho.relocation_info { + const sect = self.getSourceSection(sect_id); + const start = self.sect_relocs_lookup.items[sect_id]; + const len = sect.nreloc; + return self.relocs.items[start..][0..len]; +} + +fn relocGreaterThan(ctx: void, lhs: macho.relocation_info, rhs: macho.relocation_info) bool { + _ = ctx; + return lhs.r_address > rhs.r_address; +} + pub fn getSymbolName(self: Object, index: u32) []const u8 { const strtab = self.in_strtab.?; const sym = self.symtab[index]; diff --git a/src/link/MachO/ZldAtom.zig b/src/link/MachO/ZldAtom.zig index 817aa816625..b42309598d7 100644 --- a/src/link/MachO/ZldAtom.zig +++ b/src/link/MachO/ZldAtom.zig @@ -465,7 +465,7 @@ pub fn resolveRelocs( zld: *Zld, atom_index: AtomIndex, atom_code: []u8, - atom_relocs: []align(1) const macho.relocation_info, + atom_relocs: []const macho.relocation_info, reverse_lookup: []u32, ) !void { const arch = zld.options.target.cpu.arch; @@ -540,7 +540,7 @@ fn resolveRelocsArm64( zld: *Zld, atom_index: AtomIndex, atom_code: []u8, - atom_relocs: []align(1) const macho.relocation_info, + atom_relocs: []const macho.relocation_info, reverse_lookup: []u32, context: RelocContext, ) !void { @@ -579,7 +579,6 @@ fn resolveRelocsArm64( } const target = parseRelocTarget(zld, atom_index, rel, reverse_lookup); - const rel_offset = @intCast(u32, rel.r_address - context.base_offset); log.debug(" RELA({s}) @ {x} => %{d} ('{s}') in object({?})", .{ @tagName(rel_type), @@ -589,6 +588,7 @@ fn resolveRelocsArm64( target.file, }); + const rel_offset = @intCast(u32, rel.r_address - context.base_offset); const source_addr = blk: { const source_sym = zld.getSymbol(atom.getSymbolWithLoc()); break :blk source_sym.n_value + rel_offset; @@ -596,7 +596,7 @@ fn resolveRelocsArm64( const is_tlv = is_tlv: { const source_sym = zld.getSymbol(atom.getSymbolWithLoc()); const header = zld.sections.items(.header)[source_sym.n_sect - 1]; - break :is_tlv header.@"type"() == macho.S_THREAD_LOCAL_VARIABLES; + break :is_tlv header.type() == macho.S_THREAD_LOCAL_VARIABLES; }; const target_addr = try getRelocTargetAddress(zld, rel, target, is_tlv); @@ -831,7 +831,7 @@ fn resolveRelocsX86( zld: *Zld, atom_index: AtomIndex, atom_code: []u8, - atom_relocs: []align(1) const macho.relocation_info, + atom_relocs: []const macho.relocation_info, reverse_lookup: []u32, context: RelocContext, ) !void { @@ -877,7 +877,7 @@ fn resolveRelocsX86( const is_tlv = is_tlv: { const source_sym = zld.getSymbol(atom.getSymbolWithLoc()); const header = zld.sections.items(.header)[source_sym.n_sect - 1]; - break :is_tlv header.@"type"() == macho.S_THREAD_LOCAL_VARIABLES; + break :is_tlv header.type() == macho.S_THREAD_LOCAL_VARIABLES; }; log.debug(" | source_addr = 0x{x}", .{source_addr}); @@ -1015,27 +1015,24 @@ pub fn getAtomCode(zld: *Zld, atom_index: AtomIndex) []const u8 { return code[offset..][0..code_len]; } -pub fn getAtomRelocs(zld: *Zld, atom_index: AtomIndex) []align(1) const macho.relocation_info { +pub fn getAtomRelocs(zld: *Zld, atom_index: AtomIndex) []const macho.relocation_info { const atom = zld.getAtomPtr(atom_index); assert(atom.getFile() != null); // Synthetic atom shouldn't need to unique for relocs. const object = zld.objects.items[atom.getFile().?]; - const source_sect = if (object.getSourceSymbol(atom.sym_index)) |source_sym| blk: { - const source_sect = object.getSourceSection(source_sym.n_sect - 1); - assert(!source_sect.isZerofill()); - break :blk source_sect; + const source_sect_id = if (object.getSourceSymbol(atom.sym_index)) |source_sym| blk: { + break :blk source_sym.n_sect - 1; } else blk: { // If there was no matching symbol present in the source symtab, this means // we are dealing with either an entire section, or part of it, but also // starting at the beginning. const nbase = @intCast(u32, object.in_symtab.?.len); const sect_id = @intCast(u16, atom.sym_index - nbase); - const source_sect = object.getSourceSection(sect_id); - assert(!source_sect.isZerofill()); - break :blk source_sect; + break :blk sect_id; }; - - const relocs = object.getRelocs(source_sect); + const source_sect = object.getSourceSection(source_sect_id); + assert(!source_sect.isZerofill()); + const relocs = object.getRelocs(source_sect_id); if (atom.cached_relocs_start == -1) { const indexes = if (object.getSourceSymbol(atom.sym_index)) |source_sym| blk: { diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 3a2ea79c6ec..cee3f302c08 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -396,7 +396,7 @@ pub const Zld = struct { break :blk null; } - switch (sect.@"type"()) { + switch (sect.type()) { macho.S_4BYTE_LITERALS, macho.S_8BYTE_LITERALS, macho.S_16BYTE_LITERALS, @@ -1701,7 +1701,7 @@ pub const Zld = struct { break :outer; } } - switch (header.@"type"()) { + switch (header.type()) { macho.S_NON_LAZY_SYMBOL_POINTERS => { try self.writeGotPointer(count, buffer.writer()); }, @@ -1718,7 +1718,7 @@ pub const Zld = struct { break :outer; } } - if (header.@"type"() == macho.S_SYMBOL_STUBS) { + if (header.type() == macho.S_SYMBOL_STUBS) { try self.writeStubCode(atom_index, count, buffer.writer()); } else if (mem.eql(u8, header.sectName(), "__stub_helper")) { try self.writeStubHelperCode(atom_index, buffer.writer()); @@ -1802,7 +1802,7 @@ pub const Zld = struct { for (slice.items(.header)) |*header, sect_id| { if (header.size == 0) continue; if (self.requiresThunks()) { - if (header.isCode() and !(header.@"type"() == macho.S_SYMBOL_STUBS) and !mem.eql(u8, header.sectName(), "__stub_helper")) continue; + if (header.isCode() and !(header.type() == macho.S_SYMBOL_STUBS) and !mem.eql(u8, header.sectName(), "__stub_helper")) continue; } var atom_index = slice.items(.first_atom_index)[sect_id]; @@ -1830,7 +1830,7 @@ pub const Zld = struct { if (self.requiresThunks()) { for (slice.items(.header)) |header, sect_id| { if (!header.isCode()) continue; - if (header.@"type"() == macho.S_SYMBOL_STUBS) continue; + if (header.type() == macho.S_SYMBOL_STUBS) continue; if (mem.eql(u8, header.sectName(), "__stub_helper")) continue; // Create jump/branch range extenders if needed. @@ -1994,10 +1994,10 @@ pub const Zld = struct { const section_precedence: u4 = blk: { if (header.isCode()) { if (mem.eql(u8, "__text", header.sectName())) break :blk 0x0; - if (header.@"type"() == macho.S_SYMBOL_STUBS) break :blk 0x1; + if (header.type() == macho.S_SYMBOL_STUBS) break :blk 0x1; break :blk 0x2; } - switch (header.@"type"()) { + switch (header.type()) { macho.S_NON_LAZY_SYMBOL_POINTERS, macho.S_LAZY_SYMBOL_POINTERS, => break :blk 0x0, @@ -2121,7 +2121,7 @@ pub const Zld = struct { // Finally, unpack the rest. for (slice.items(.header)) |header, sect_id| { - switch (header.@"type"()) { + switch (header.type()) { macho.S_LITERAL_POINTERS, macho.S_REGULAR, macho.S_MOD_INIT_FUNC_POINTERS, @@ -2252,7 +2252,7 @@ pub const Zld = struct { // Finally, unpack the rest. const slice = self.sections.slice(); for (slice.items(.header)) |header, sect_id| { - switch (header.@"type"()) { + switch (header.type()) { macho.S_LITERAL_POINTERS, macho.S_REGULAR, macho.S_MOD_INIT_FUNC_POINTERS, @@ -2707,10 +2707,10 @@ pub const Zld = struct { } fn filterDataInCode( - dices: []const macho.data_in_code_entry, + dices: []align(1) const macho.data_in_code_entry, start_addr: u64, end_addr: u64, - ) []const macho.data_in_code_entry { + ) []align(1) const macho.data_in_code_entry { const Predicate = struct { addr: u64,