From 982fc4b8167b30712f6357b827fe2deb9d89f412 Mon Sep 17 00:00:00 2001 From: "Arina.Naumova" Date: Mon, 23 Dec 2024 05:24:31 -0500 Subject: [PATCH] Added placement of merged section data indexes inside .adlt section. Added printing of information about the merged section to the output of the --adlt-trace linker option. Change-Id: I4342864c54118e4063300f9438e6b32792dfc229 --- lld/ELF/Adlt/Context.cpp | 19 ++++ lld/ELF/Adlt/Context.h | 15 +++ lld/ELF/Adlt/InputFiles.cpp | 2 + lld/ELF/Adlt/SyntheticSections.cpp | 98 +++++++++++++++++++- lld/ELF/Adlt/Writer.cpp | 71 +++++++++++++- lld/ELF/Adlt/Writer.h | 1 + lld/ELF/Writer.cpp | 6 +- llvm/include/llvm/BinaryFormat/ADLTSection.h | 23 +++-- 8 files changed, 217 insertions(+), 18 deletions(-) diff --git a/lld/ELF/Adlt/Context.cpp b/lld/ELF/Adlt/Context.cpp index 379e6105c639..33b28a92e245 100644 --- a/lld/ELF/Adlt/Context.cpp +++ b/lld/ELF/Adlt/Context.cpp @@ -108,6 +108,8 @@ struct lld::elf::adlt::ContextImpl { void copyRequiredLocalSymbols(); + void prepareSoToMergedSecMap(); + void generateRelocIndexes(); llvm::UniqueStringSaver uniqueSaver{lld::bAlloc()}; @@ -317,6 +319,22 @@ void Impl::copyRequiredLocalSymbols() { } } +void Impl::prepareSoToMergedSecMap() { + for (SharedFile *file : sharedFiles) + for (InputSectionBase *sharedInputSec : file->getSections()) + for (auto it : ctx->mergedSections) { + const auto oSec = std::get(it); + const auto pHdr = std::get(it); + for (SectionCommand *cmd : oSec->commands) + if (auto *isd = cast(cmd)) + for (auto mergedInputSec : isd->sections) + if (sharedInputSec == mergedInputSec) { + ctx->soToMergedSecMap[file].push_back( + {sharedInputSec, oSec, pHdr}); + } + } +} + Context::Context() : adltInfo(std::make_unique()), impl(std::make_unique()) {} @@ -393,6 +411,7 @@ void Context::generateDynamicTags(elf::Partition &part, void Context::generateRelocIndexes() { impl->generateRelocIndexes(); } void Context::copyRequiredLocalSymbols() { impl->copyRequiredLocalSymbols(); } +void Context::prepareSoToMergedSecMap() { impl->prepareSoToMergedSecMap(); } void Context::addRelaDynGotIndex(StringRef symName, uint64_t offsetInGot) { auto &lastRel = mainPart->relaDyn->relocs.back(); diff --git a/lld/ELF/Adlt/Context.h b/lld/ELF/Adlt/Context.h index 2b2457b5a39a..5fb0e8820ac7 100644 --- a/lld/ELF/Adlt/Context.h +++ b/lld/ELF/Adlt/Context.h @@ -24,6 +24,7 @@ namespace elf { class ELFFileBase; class InputSectionBase; class InputFile; +class OutputSection; class Symbol; struct PhdrEntry; struct Partition; @@ -97,6 +98,10 @@ public: bool shouldSymbolBeUnique(llvm::StringRef name) const; llvm::SetVector commonProgramHeaders; + // When a new PT_LOAD PHdr is added, we add here its output sections + // with more than one input section + llvm::SmallVector> + mergedSections; // .symtab's common end (after last) local symbol index llvm::Optional getSharedEndLocalSymbolIndex(); // .symtab's common end (after last) global symbol index @@ -124,6 +129,16 @@ public: void addRelrDynGotIndex(StringRef symName, uint64_t offsetInGot); void addRelaPltIndex(StringRef symName, uint64_t offsetInGotPlt); + // We use this mapping during adlt section write + // and during printing with --adlt-trace option + llvm::MapVector< + const SharedFile *, + std::vector>> + soToMergedSecMap; + + void prepareSoToMergedSecMap(); + private: std::unique_ptr impl; }; diff --git a/lld/ELF/Adlt/InputFiles.cpp b/lld/ELF/Adlt/InputFiles.cpp index 9d6b91aa5574..f62f3e76152c 100644 --- a/lld/ELF/Adlt/InputFiles.cpp +++ b/lld/ELF/Adlt/InputFiles.cpp @@ -251,6 +251,8 @@ InputSectionBase *Impl::createInputSection(uint32_t indx, init(sec); if (name == ".note.ohos.ident") // Optimization: sec->file = nullptr; // this section will be in a common segment. + if (name == ".note.gnu.build-id") // Optimization: + sec->file = nullptr; // this section will be in a common segment. return sec; } diff --git a/lld/ELF/Adlt/SyntheticSections.cpp b/lld/ELF/Adlt/SyntheticSections.cpp index bb36ccebe4ce..9888a845e022 100644 --- a/lld/ELF/Adlt/SyntheticSections.cpp +++ b/lld/ELF/Adlt/SyntheticSections.cpp @@ -48,12 +48,16 @@ static_assert(sizeof(adlt_blob_array_t) == 16, static_assert(sizeof(adltBlobStartMark) == 4, "0xad17 consist of 4 bytes"); -static_assert(sizeof(adlt_section_header_t) == 88, +static_assert(sizeof(adlt_section_header_t) == 96, "please update version if header has been changed"); -static_assert(sizeof(adlt_psod_t) == 184, +static_assert(sizeof(adlt_psod_t) == 200, "please udpate version if adlt_psod_t layout or content changed"); +static_assert( + sizeof(adlt_chunk_t) == 24, + "please udpate version if adlt_chunk_t layout or content changed"); + struct lld::elf::adlt::AdltSectionImpl { AdltSectionImpl(StringTableSection &strTabSec) : strTabSec(strTabSec) {} @@ -70,6 +74,19 @@ struct lld::elf::adlt::AdltSectionImpl { using DtNeededsVec = SmallVector; + // will be serialized to adlt_chunk_t + struct MergedSecData { + uint64_t dataOff; // section data offset within the segment + uint64_t dataSize; // section data size + uint64_t secIndx; // section index + uint64_t + segIndex; // segment index (or pointer to the according pHdr before + // calling removeEmptyPTLoad) to be replaced with correct + // segment index after removeEmptyPTLoad and before writing + }; + + using MergedSecsVec = SmallVector; + // will be serialized to adlt_psod_t struct SoData { SectionString soName; @@ -94,6 +111,7 @@ struct lld::elf::adlt::AdltSectionImpl { ArrayRef relrDynIndx; ArrayRef relaDynIndx; ArrayRef relaPltIndx; + MergedSecsVec mergedSecs; }; // will be used to form some header data @@ -117,6 +135,7 @@ struct lld::elf::adlt::AdltSectionImpl { Elf64_Xword calculateHash(StringRef str) const; CommonData makeCommonData(); + void makeSoDataMergedSecs(const SharedFile *, Impl::SoData &); SoData makeSoData(const SharedFile *); adlt_psod_t serialize(const SoData &) const; size_t estimateBlobSize() const; @@ -146,6 +165,8 @@ struct lld::elf::adlt::AdltSectionImpl { }; void Impl::finalizeContents() { + adlt::ctx->prepareSoToMergedSecMap(); + soInputs.clear(); soInputs.reserve(adlt::ctx->getSharedFiles().size()); for (SharedFile *file : adlt::ctx->getSharedFiles()) @@ -174,6 +195,7 @@ void Impl::finalizeContents() { // writeTo adlt_cross_section_ref_t{}, // .sharedEndGlobalSymbolIndex, filled in // writeTo + sizeof(adlt_chunk_t), // .schemaChunkSize }; buildSonameIndex(); @@ -245,6 +267,43 @@ Impl::CommonData Impl::makeCommonData() { }; } +void Impl::makeSoDataMergedSecs(const SharedFile *file, Impl::SoData &data) { + // TODO: add checkings, e.g if segIdx == 0, etc + if (adlt::ctx->soToMergedSecMap.find(file) != + adlt::ctx->soToMergedSecMap.end()) { + for (auto it : adlt::ctx->soToMergedSecMap[file]) { + const auto mergedISec = std::get(it); + const auto oSec = std::get(it); + const auto pHdr = std::get(it); + uint64_t secSize = mergedISec->size; // section size inside segment + uint64_t secIdx = oSec->sectionIndex; // merged section index (merged + // output section number) + // find section data offset inside segment + uint64_t secOffset = oSec->addr - pHdr->p_vaddr; + for (SectionCommand *oSecCmd : oSec->commands) { + if (auto *oSecInpSecDesc = cast(oSecCmd)) + // expect the sections to be added to the segment in the same order as + // they are added to outSecInpSecDesc->sections + for (auto &iSec : oSecInpSecDesc->sections) { + if (mergedISec == + iSec) { // found the section, the offset is calculated + break; + } + secOffset += + iSec->size; // mergedISec->size could be used, does not matter + } + } + + // the output segment index can be set after + // Writer::removeEmptyPTLoad have been called + // for now we'll just remember the pointer to pHdr + uint64_t segIdx = reinterpret_cast(pHdr); + + data.mergedSecs.push_back({secOffset, secSize, secIdx, segIdx}); + } + } +} + Impl::SoData Impl::makeSoData(const SharedFile *file) { assert(file); SoData data = {}; @@ -276,6 +335,8 @@ Impl::SoData Impl::makeSoData(const SharedFile *file) { data.programHeaders = file->getProgramHeaders().getArrayRef(); } + makeSoDataMergedSecs(file, data); + return data; } @@ -324,7 +385,7 @@ adlt_psod_t Impl::serialize(const SoData &soData) const { adlt_secindex_t( soData.ehFrameHdrSecIndex.value_or(UINT32_MAX) ), // .ehFrameHdrSecIndex - // adlt_blob_u8_array_t {}, // TODO: the content from .note.gnu.build-id + adlt_blob_u8_array_t {}, // merged sections data offset and size in blob, filled in writeTo }; } @@ -339,6 +400,7 @@ size_t Impl::estimateBlobSize() const { blobSize += sizeof(uint32_t) * soData.relrDynIndx.size(); blobSize += sizeof(uint32_t) * soData.relaDynIndx.size(); blobSize += sizeof(uint32_t) * soData.relaPltIndx.size(); + blobSize += sizeof(adlt_chunk_t) * soData.mergedSecs.size(); }; return blobSize; @@ -460,6 +522,15 @@ void Impl::finalizeOnWrite(size_t idx, SoData &soData) { } } + // Set segIndexes (we do it here, because they may have been changed after + // Writer::removeEmptyPTLoad) + for (auto &it : llvm::enumerate(mainPart->phdrs)) + for (MergedSecData &secData : soData.mergedSecs) { + auto *tmp_ptr = reinterpret_cast(secData.segIndex); + if (tmp_ptr == it.value()) + secData.segIndex = it.index(); + } + assert(soData.phIndexes.size() <= soData.programHeadersAllocated); if (soData.phIndexes.size() != soData.programHeadersAllocated) { warn(Twine(".adlt section: overallocated ph-indexes for psod-") + @@ -517,6 +588,27 @@ void Impl::writeTo(uint8_t *buf) { psod.relaPltIndx = writeArray(blobBuf, blobOff, soData.relaPltIndx); blobOff += psod.relaPltIndx.size; + } + + // merged data + for (const auto &it : llvm::enumerate(soInputs)) { + const auto &soData = it.value(); + auto &psod = psods[it.index()]; + + llvm::SmallVector mergedSecsData; + for (const MergedSecData &secData : soData.mergedSecs) { + adlt_chunk_t tmp; + tmp.offset = secData.dataOff; + tmp.size = secData.dataSize; + tmp.secIndex = secData.secIndx; + tmp.phIndex = secData.segIndex; + uint8_t *pTmp = reinterpret_cast(&tmp); + for (size_t i = 0; i < sizeof(adlt_chunk_t); i++) + mergedSecsData.push_back(pTmp[i]); + } + + psod.chunkArray = writeArray(blobBuf, blobOff, mergedSecsData); + blobOff += psod.chunkArray.size; } // finalize header.blobSize diff --git a/lld/ELF/Adlt/Writer.cpp b/lld/ELF/Adlt/Writer.cpp index 8d07ae245d26..2eaf618cfc95 100644 --- a/lld/ELF/Adlt/Writer.cpp +++ b/lld/ELF/Adlt/Writer.cpp @@ -39,6 +39,7 @@ public: void printRelrTable(RelrBaseSection *relSec, kIndexes &indexes); void printRelaTable(RelocationBaseSection *relSec, kIndexes &indexes); void traceRelocs() const; + void traceMergedSecOff() const; void tracePhdrs() const; private: @@ -59,6 +60,7 @@ struct Checker { void checkRelocs() const; void trackPhdr(kOsec *sec, Phdr *phdr) const; + void trackMergedSec(kOsec *oSec, Phdr *pHdr) const; template void checkOutSections(RelSec *relSec, kIndexes &indexes, kSize orderIndx) const; @@ -115,9 +117,9 @@ void Writer::updateLoadSegments(SmallVector &phdrs) const { // TODO: Reduce alignment for segments with single section // TODO: Check the size of the sections or // memory size for the segment and add alignment if they overlap. - bool withOwner = p->firstSec->hasInputSections; - bool withText = p->firstSec->name.startswith(".text"); - bool isRelRo = checker.isRelroSection(*p->firstSec); + bool withOwner = p->firstSec ? p->firstSec->hasInputSections : false; + bool withText = p->firstSec ? p->firstSec->name.startswith(".text") : false; + bool isRelRo = p->firstSec ? checker.isRelroSection(*p->firstSec) : false; if (!elf::config->adltSimple && adlt::ctx->withCfi && withOwner && (withText || isRelRo)) { // check cfi.h: LIBRARY_ALIGNMENT and _BITS @@ -134,6 +136,7 @@ void Writer::updateLoadSegments(SmallVector &phdrs) const { void Writer::checkPhdrs() const { checker.checkPhdrs(); } void Writer::checkRelocs() const { checker.checkRelocs(); } void Writer::traceRelocs() const { tracer.traceRelocs(); } +void Writer::traceMergedSecOff() const { tracer.traceMergedSecOff(); } void Writer::tracePhdrs() const { tracer.tracePhdrs(); } bool Writer::isRelroSection(kOsec &sec) const { @@ -145,7 +148,12 @@ void Writer::trackNewPhdr(Phdr *p) const { return; // Track some PHdrs switch (p->p_type) { + case PT_NOTE: + checker.trackMergedSec(p->lastSec, p); + break; case PT_LOAD: + checker.trackMergedSec(p->lastSec, p); + LLVM_FALLTHROUGH; case PT_TLS: case PT_GNU_EH_FRAME: checker.trackPhdr(p->lastSec, p); @@ -328,6 +336,53 @@ void Tracer::traceRelocs() const { } } +void Tracer::traceMergedSecOff() const { + using elf::adlt::ctx; + using elf::adlt::SharedFile; + lld::outs() << "[ADLT]\n"; + lld::outs() << "Merged sections (" << ctx->mergedSections.size() << ")\n"; + + for (auto &fPair : ctx->soToMergedSecMap) { + lld::outs() << "From " << fPair.first->getName() << "\n"; + lld::outs() << "Name\t\t\tOffsetInSeg\tSecSize\tSecIdx\tSegIdx\n"; + for (auto &it : fPair.second) { + const auto mergedISec = std::get(it); + const auto oSec = std::get(it); + const auto pHdr = std::get(it); + uint64_t secSize = mergedISec->size; // section size inside segment + uint64_t secIdx = oSec->sectionIndex; // merged section index (merged + // output section number) + // find section data offset inside segment + uint64_t secOffset = oSec->addr - pHdr->p_vaddr; + for (SectionCommand *oSecCmd : oSec->commands) + if (auto *outSecInpSecDesc = cast(oSecCmd)) + // expect the sections to be added to the segment in the same order as + // they are added to outSecInpSecDesc->sections but I need to think + // about alignment - if by now the section size is set without taking + // alignment into account - then it won't work that way? + for (auto &iSec : outSecInpSecDesc->sections) { + if (mergedISec == + iSec) { // found the section, the offset is calculated + break; + } + secOffset += + iSec->size; // mergedISec->size could be used, does not matter + } + // Find the output segment index - it can be set after + // Writer::removeEmptyPTLoad have been called + uint64_t segIdx = 0; + for (auto &it : llvm::enumerate(mainPart->phdrs)) + if (pHdr == it.value()) { + segIdx = it.index(); + } + + lld::outs() << oSec->name << "\t\t" << utohexstr(secOffset) << "\t" + << utohexstr(secSize) << "\t" << secIdx << "\t" << segIdx + << "\n"; + } + } +} + void Tracer::tracePhdrs() const { using elf::adlt::ctx; using elf::adlt::SharedFile; @@ -435,6 +490,16 @@ void Checker::trackPhdr(kOsec *sec, Phdr *phdr) const { file->setTlsPhdr(phdr); } +void Checker::trackMergedSec(kOsec *oSec, Phdr *pHdr) const { + for (SectionCommand *cmd : pHdr->lastSec->commands) + if (auto *osd = cast(cmd)) + // track if output section has more than one input + if (osd->sections.size() > 1) { + elf::adlt::ctx->mergedSections.push_back({oSec, pHdr}); + } + return; +} + bool Checker::isRelroSection(kOsec &sec) const { if (!config->zRelro) return false; diff --git a/lld/ELF/Adlt/Writer.h b/lld/ELF/Adlt/Writer.h index 17872c0f861c..76d7db5c2142 100644 --- a/lld/ELF/Adlt/Writer.h +++ b/lld/ELF/Adlt/Writer.h @@ -44,6 +44,7 @@ struct Writer { void checkRelocs() const; void traceRelocs() const; + void traceMergedSecOff() const; void tracePhdrs() const; bool needsPhdr(kOsec *sec, kU64 flags, kPhdr *load, kSize loadPos) const; diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index e407d1ba2d42..8b4534b7913f 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -638,6 +638,7 @@ template void Writer::run() { writer->checkRelocs(); if (config->adltTrace) { writer->tracePhdrs(); + writer->traceMergedSecOff(); writer->traceRelocs(); } } @@ -2781,11 +2782,6 @@ template void Writer::assignFileOffsets() { if (config->zSeparate != SeparateSegmentKind::None && lastRX && lastRX->lastSec == sec) off = alignToPowerOf2(off, config->maxPageSize); - bool debug = false; - if (debug) - lld::outs() << "assignFileOffsets() Sec: " << sec->name - << " Off: " << std::to_string(off) - << " Offset: " << std::to_string(sec->offset) << '\n'; } for (OutputSection *osec : outputSections) if (!(osec->flags & SHF_ALLOC)) { diff --git a/llvm/include/llvm/BinaryFormat/ADLTSection.h b/llvm/include/llvm/BinaryFormat/ADLTSection.h index a60878aa68bb..cf0b8a4ff364 100644 --- a/llvm/include/llvm/BinaryFormat/ADLTSection.h +++ b/llvm/include/llvm/BinaryFormat/ADLTSection.h @@ -90,14 +90,22 @@ typedef enum { typedef uint8_t adlt_hash_type_t; +// Data chunk descriptor (e.g. for merged sections) +typedef struct { + Elf64_Off offset; // data offset within the segment + Elf64_Xword size; // data size + adlt_secindex_t secIndex; // section index + adlt_phindex_t phIndex; // segment index +} adlt_chunk_t; + // Serializable representation per-shared-object-data in .adlt section typedef struct { - Elf64_Off soName; // offset in .adlt.strtab - Elf64_Xword soNameHash; // algorithm according to - // hdr.stringHashType value + Elf64_Off soName; // offset in .adlt.strtab + Elf64_Xword soNameHash; // algorithm according to + // hdr.stringHashType value adlt_cross_section_array_t initArray; adlt_cross_section_array_t finiArray; - adlt_blob_array_t dtNeeded; // array of adlt_dt_needed_index_t[] elems + adlt_blob_array_t dtNeeded; // array of adlt_dt_needed_index_t[] elems adlt_cross_section_ref_t sharedLocalSymbolIndex; adlt_cross_section_ref_t sharedGlobalSymbolIndex; adlt_blob_u16_array_t phIndexes; // program header indexes, typeof(e_phnum) @@ -105,8 +113,8 @@ typedef struct { adlt_blob_rel_array_t relaPltIndx; // .rela.plt dependent indexes, raw list adlt_blob_rel_array_t relrDynIndx; // .relr.dyn dependent indexes, raw list adlt_secindex_t ehFrameHdrSecIndex; // .eh_frame_hdr index - // adlt_blob_u8_array_t gnuBuildId; // TODO: the content from - // .note.gnu.build-id + adlt_blob_array_t + chunkArray; // chunk array (for merged sections); offset and size in blob } adlt_psod_t; typedef struct { @@ -125,11 +133,12 @@ typedef struct { adlt_cross_section_ref_t sharedEndGlobalSymbolIndex; // .symtab's end (after last) global symbol // index + Elf64_Half schemaChunkSize; // >= sizeof(adlt_chunk_t) if compatible } adlt_section_header_t; static const char adltBlobStartMark[4] = {0xA, 0xD, 0x1, 0x7}; -static const adlt_semver_t adltSchemaVersion = {1, 4, 1}; +static const adlt_semver_t adltSchemaVersion = {1, 5, 0}; #ifdef __cplusplus } // namespace adlt -- Gitee