From 65a3d38a2a518a8d47eabb1c58552d159af54daf Mon Sep 17 00:00:00 2001 From: invertego Date: Mon, 13 Mar 2023 08:30:14 -0700 Subject: [PATCH] build: enable clang ThinLTO With ThinLTO and an incremental LTO cache, incremental link time (with LTO, of course) goes down from tens of seconds to less than a second. For whatever reason, ThinLTO needs libco's co_swap_function to have external linkage. Who am I to argue? --- libco/aarch64.c | 2 +- libco/amd64.c | 4 ++-- libco/x86.c | 2 +- nall/GNUmakefile | 16 +++++++++++++--- 4 files changed, 17 insertions(+), 7 deletions(-) diff --git a/libco/aarch64.c b/libco/aarch64.c index 56de22b3b..3764954bd 100644 --- a/libco/aarch64.c +++ b/libco/aarch64.c @@ -19,7 +19,7 @@ static void (*co_swap)(cothread_t, cothread_t) = 0; #else section(text) #endif -static const uint32_t co_swap_function[1024] = { +const uint32_t co_swap_function[1024] = { 0x910003f0, /* mov x16,sp */ 0xa9007830, /* stp x16,x30,[x1] */ 0xa9407810, /* ldp x16,x30,[x0] */ diff --git a/libco/amd64.c b/libco/amd64.c index 8d3aec55f..8e4c4e343 100644 --- a/libco/amd64.c +++ b/libco/amd64.c @@ -20,7 +20,7 @@ static void (*co_swap)(cothread_t, cothread_t) = 0; #endif #ifdef _WIN32 /* ABI: Win64 */ - static const unsigned char co_swap_function[4096] = { + const unsigned char co_swap_function[4096] = { 0x48, 0x89, 0x22, /* mov [rdx],rsp */ 0x48, 0x8b, 0x21, /* mov rsp,[rcx] */ 0x58, /* pop rax */ @@ -87,7 +87,7 @@ static void (*co_swap)(cothread_t, cothread_t) = 0; } #else /* ABI: SystemV */ - static const unsigned char co_swap_function[4096] = { + const unsigned char co_swap_function[4096] = { 0x48, 0x89, 0x26, /* mov [rsi],rsp */ 0x48, 0x8b, 0x27, /* mov rsp,[rdi] */ 0x58, /* pop rax */ diff --git a/libco/x86.c b/libco/x86.c index c539a2996..c0b20dbaa 100644 --- a/libco/x86.c +++ b/libco/x86.c @@ -27,7 +27,7 @@ static void (fastcall *co_swap)(cothread_t, cothread_t) = 0; section(text) #endif /* ABI: fastcall */ -static const unsigned char co_swap_function[4096] = { +const unsigned char co_swap_function[4096] = { 0x89, 0x22, /* mov [edx],esp */ 0x8b, 0x21, /* mov esp,[ecx] */ 0x58, /* pop eax */ diff --git a/nall/GNUmakefile b/nall/GNUmakefile index 060726150..0342ac804 100644 --- a/nall/GNUmakefile +++ b/nall/GNUmakefile @@ -200,14 +200,24 @@ endif # link-time optimization ifeq ($(lto),true) ifeq ($(cl),true) - flags += $(if $(findstring clang,$(compiler)),-flto,-GL) - options += -ltcg:incremental -ltcgout:$(object.path)/$(name).iobj + ifneq ($(findstring clang,$(compiler)),clang) + flags += -GL + options += -ltcg:incremental -ltcgout:$(object.path)/$(name).iobj + else + flags += -flto=thin + options += -lldltocache:$(object.path)/lto + endif else ifneq ($(findstring clang++,$(compiler)),clang++) flags += -flto=auto -fno-fat-lto-objects else - flags += -flto + flags += -flto=thin options += -flto=thin + ifneq ($(platform),macos) + options += -Wl,--thinlto-cache-dir=$(object.path)/lto + else + options += -Wl,-cache_path_lto,$(object.path)/lto + endif endif endif endif