From 0a76c7e8c11ac0d06327eca3fd0ad119ab030835 Mon Sep 17 00:00:00 2001 From: Suresh Srinivas Date: Fri, 23 Mar 2018 12:16:48 -0700 Subject: [PATCH 01/24] Initial Large Page (2M) support for node.js --- ld.script | 233 +++++++++++++++++++++++++++++++++++++++++ node.gyp | 1 + node.gypi | 1 + src/node.cc | 10 +- src/node_large_page.cc | 220 ++++++++++++++++++++++++++++++++++++++ src/node_large_page.h | 11 ++ 6 files changed, 475 insertions(+), 1 deletion(-) create mode 100644 ld.script create mode 100644 src/node_large_page.cc create mode 100644 src/node_large_page.h diff --git a/ld.script b/ld.script new file mode 100644 index 00000000000000..a75325802a2438 --- /dev/null +++ b/ld.script @@ -0,0 +1,233 @@ +/* Script for -z combreloc: combine and sort reloc sections */ +/* Copyright (C) 2014-2015 Free Software Foundation, Inc. + Copying and distribution of this script, with or without modification, + are permitted in any medium without royalty provided the copyright + notice and this notice are preserved. */ +OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", + "elf64-x86-64") +OUTPUT_ARCH(i386:x86-64) +ENTRY(_start) +SEARCH_DIR("=/usr/local/lib/x86_64-linux-gnu"); SEARCH_DIR("=/lib/x86_64-linux-gnu"); SEARCH_DIR("=/usr/lib/x86_64-linux-gnu"); SEARCH_DIR("=/usr/local/lib64"); SEARCH_DIR("=/lib64"); SEARCH_DIR("=/usr/lib64"); SEARCH_DIR("=/usr/local/lib"); SEARCH_DIR("=/lib"); SEARCH_DIR("=/usr/lib"); SEARCH_DIR("=/usr/x86_64-linux-gnu/lib64"); SEARCH_DIR("=/usr/x86_64-linux-gnu/lib"); +SECTIONS +{ + /* Read-only sections, merged into text segment: */ + PROVIDE (__executable_start = SEGMENT_START("text-segment", 0x400000)); . = SEGMENT_START("text-segment", 0x400000) + SIZEOF_HEADERS; + .interp : { *(.interp) } + .note.gnu.build-id : { *(.note.gnu.build-id) } + .hash : { *(.hash) } + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + .rela.dyn : + { + *(.rela.init) + *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*) + *(.rela.fini) + *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*) + *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*) + *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*) + *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*) + *(.rela.ctors) + *(.rela.dtors) + *(.rela.got) + *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*) + *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*) + *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*) + *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*) + *(.rela.ifunc) + } + .rela.plt : + { + *(.rela.plt) + PROVIDE_HIDDEN (__rela_iplt_start = .); + *(.rela.iplt) + PROVIDE_HIDDEN (__rela_iplt_end = .); + } + .init : + { + KEEP (*(SORT_NONE(.init))) + } + .plt : { *(.plt) *(.iplt) } +.plt.got : { *(.plt.got) } +.plt.bnd : { *(.plt.bnd) } + PROVIDE (__nodetext = .); + PROVIDE (_nodetext = .); + PROVIDE (nodetext = .); + .text : + { + *(.text.unlikely .text.*_unlikely .text.unlikely.*) + *(.text.exit .text.exit.*) + *(.text.startup .text.startup.*) + *(.text.hot .text.hot.*) + *(.text .stub .text.* .gnu.linkonce.t.*) + /* .gnu.warning sections are handled specially by elf32.em. */ + *(.gnu.warning) + } + .fini : + { + KEEP (*(SORT_NONE(.fini))) + } + PROVIDE (__etext = .); + PROVIDE (_etext = .); + PROVIDE (etext = .); + .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } + .rodata1 : { *(.rodata1) } + .eh_frame_hdr : { *(.eh_frame_hdr) *(.eh_frame_entry .eh_frame_entry.*) } + .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) *(.eh_frame.*) } + .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table + .gcc_except_table.*) } + .gnu_extab : ONLY_IF_RO { *(.gnu_extab*) } + /* These sections are generated by the Sun/Oracle C++ compiler. */ + .exception_ranges : ONLY_IF_RO { *(.exception_ranges + .exception_ranges*) } + /* Adjust the address for the data segment. We want to adjust up to + the same address within the page on the next page up. */ + . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE)); + /* Exception handling */ + .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) *(.eh_frame.*) } + .gnu_extab : ONLY_IF_RW { *(.gnu_extab) } + .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) } + .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) } + /* Thread Local Storage sections */ + .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) } + .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) } + .preinit_array : + { + PROVIDE_HIDDEN (__preinit_array_start = .); + KEEP (*(.preinit_array)) + PROVIDE_HIDDEN (__preinit_array_end = .); + } + .init_array : + { + PROVIDE_HIDDEN (__init_array_start = .); + KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*))) + KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin?.o *crtend.o *crtend?.o ) .ctors)) + PROVIDE_HIDDEN (__init_array_end = .); + } + .fini_array : + { + PROVIDE_HIDDEN (__fini_array_start = .); + KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*))) + KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin?.o *crtend.o *crtend?.o ) .dtors)) + PROVIDE_HIDDEN (__fini_array_end = .); + } + .ctors : + { + /* gcc uses crtbegin.o to find the start of + the constructors, so we make sure it is + first. Because this is a wildcard, it + doesn't matter if the user does not + actually link against crtbegin.o; the + linker won't look for a file to match a + wildcard. The wildcard also means that it + doesn't matter which directory crtbegin.o + is in. */ + KEEP (*crtbegin.o(.ctors)) + KEEP (*crtbegin?.o(.ctors)) + /* We don't want to include the .ctor section from + the crtend.o file until after the sorted ctors. + The .ctor section from the crtend file contains the + end of ctors marker and it must be last */ + KEEP (*(EXCLUDE_FILE (*crtend.o *crtend?.o ) .ctors)) + KEEP (*(SORT(.ctors.*))) + KEEP (*(.ctors)) + } + .dtors : + { + KEEP (*crtbegin.o(.dtors)) + KEEP (*crtbegin?.o(.dtors)) + KEEP (*(EXCLUDE_FILE (*crtend.o *crtend?.o ) .dtors)) + KEEP (*(SORT(.dtors.*))) + KEEP (*(.dtors)) + } + .jcr : { KEEP (*(.jcr)) } + .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) } + .dynamic : { *(.dynamic) } + .got : { *(.got) *(.igot) } + . = DATA_SEGMENT_RELRO_END (SIZEOF (.got.plt) >= 24 ? 24 : 0, .); + .got.plt : { *(.got.plt) *(.igot.plt) } + .data : + { + *(.data .data.* .gnu.linkonce.d.*) + SORT(CONSTRUCTORS) + } + .data1 : { *(.data1) } + _edata = .; PROVIDE (edata = .); + . = .; + __bss_start = .; + .bss : + { + *(.dynbss) + *(.bss .bss.* .gnu.linkonce.b.*) + *(COMMON) + /* Align here to ensure that the .bss section occupies space up to + _end. Align after .bss to ensure correct alignment even if the + .bss section disappears because there are no input sections. + FIXME: Why do we need it? When there is no .bss section, we don't + pad the .data section. */ + . = ALIGN(. != 0 ? 64 / 8 : 1); + } + .lbss : + { + *(.dynlbss) + *(.lbss .lbss.* .gnu.linkonce.lb.*) + *(LARGE_COMMON) + } + . = ALIGN(64 / 8); + . = SEGMENT_START("ldata-segment", .); + .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) : + { + *(.lrodata .lrodata.* .gnu.linkonce.lr.*) + } + .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) : + { + *(.ldata .ldata.* .gnu.linkonce.l.*) + . = ALIGN(. != 0 ? 64 / 8 : 1); + } + . = ALIGN(64 / 8); + _end = .; PROVIDE (end = .); + . = DATA_SEGMENT_END (.); + /* Stabs debugging sections. */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + /* DWARF debug sections. + Symbols in the DWARF debugging sections are relative to the beginning + of the section so we begin them at 0. */ + /* DWARF 1 */ + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + /* GNU DWARF 1 extensions */ + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + /* DWARF 1.1 and DWARF 2 */ + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + /* DWARF 2 */ + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + /* SGI/MIPS DWARF 2 extensions */ + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + /* DWARF 3 */ + .debug_pubtypes 0 : { *(.debug_pubtypes) } + .debug_ranges 0 : { *(.debug_ranges) } + /* DWARF Extension. */ + .debug_macro 0 : { *(.debug_macro) } + .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } + /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) } +} diff --git a/node.gyp b/node.gyp index cbf0542965ca83..7465bbb1cd1e8c 100644 --- a/node.gyp +++ b/node.gyp @@ -302,6 +302,7 @@ 'src/js_stream.cc', 'src/module_wrap.cc', 'src/node.cc', + 'src/node_large_page.cc', 'src/node_api.cc', 'src/node_api.h', 'src/node_api_types.h', diff --git a/node.gypi b/node.gypi index 852cc18eee7495..5c88fd8e855d77 100644 --- a/node.gypi +++ b/node.gypi @@ -163,6 +163,7 @@ 'conditions': [ ['OS!="aix" and node_shared=="false"', { 'ldflags': [ + '-Wl,-T <(PRODUCT_DIR)/../../ld.script', '-Wl,--whole-archive,<(obj_dir)/deps/uv/<(STATIC_LIB_PREFIX)' 'uv<(STATIC_LIB_SUFFIX)', '-Wl,--no-whole-archive', diff --git a/src/node.cc b/src/node.cc index d3749b9cc12937..bf5d53fc64c0e4 100644 --- a/src/node.cc +++ b/src/node.cc @@ -69,6 +69,7 @@ #ifdef NODE_ENABLE_VTUNE_PROFILING #include "../deps/v8/src/third_party/vtune/v8-vtune.h" #endif +#include "node_large_page.h" #include #include // _O_RDWR @@ -4423,7 +4424,7 @@ inline int Start(Isolate* isolate, IsolateData* isolate_data, return exit_code; } - + inline int Start(uv_loop_t* event_loop, int argc, const char* const* argv, int exec_argc, const char* const* exec_argv) { @@ -4434,6 +4435,7 @@ inline int Start(uv_loop_t* event_loop, params.code_event_handler = vTune::GetVtuneCodeEventHandler(); #endif + Isolate* const isolate = Isolate::New(params); if (isolate == nullptr) return 12; // Signal internal error. @@ -4483,6 +4485,11 @@ int Start(int argc, char** argv) { CHECK_GT(argc, 0); + //#ifdef NODE_ENABLE_LARGE_CODE_PAGES + node::largepages::map_static_code_to_large_pages(); + //#endif + + // Hack around with the argv pointer. Used for process.title = "blah". argv = uv_setup_args(argc, argv); @@ -4534,6 +4541,7 @@ int Start(int argc, char** argv) { // will never be fully cleaned up. v8_platform.Dispose(); + delete[] exec_argv; exec_argv = nullptr; diff --git a/src/node_large_page.cc b/src/node_large_page.cc new file mode 100644 index 00000000000000..be55eae0a0c4e2 --- /dev/null +++ b/src/node_large_page.cc @@ -0,0 +1,220 @@ +#include +#include // _O_RDWR +#include // PATH_MAX +#include +#include +#include +#include +#include +#include +#include +#include + + +/* +The functions in this file map the text segment of node into 2M pages. +The algorithm is quite simple + 1. Find the text region of node in memory + 2. Move the text region to large pages +*/ + +extern char __executable_start; +extern char __etext; +extern char __nodetext; + +namespace node { + + namespace largepages { + +#define ALIGN(x, a) (((x) + (a) - 1) & ~((a) - 1)) +#define PAGE_ALIGN_UP(x,a) ALIGN(x,a) +#define PAGE_ALIGN_DOWN(x,a) ((x) & ~((a) - 1)) + + struct TextRegion { + void * from; + void * to; + long offset; + char name[PATH_MAX]; + }; + + + /* + Finding the text region. + 1. We read the maps file and find the start and end addresss of the loaded node process + 2. Within that start and end address is the .text region is what we are interested in. + 3. We modify the linker script to PROVIDE(__nodetext) which points to this region. + 4. _etext is the end of the .text segment. + 5. We return back a struct of the TextRegion + + The /proc//maps looks like this. The first entry is executable of the node process and it's address is from 00400000-020c7000 + 00400000-020c7000 r-xp 00000000 08:01 538609 /home/ssuresh/node/out/Release/node + 022c6000-022c7000 r--p 01cc6000 08:01 538609 /home/ssuresh/node/out/Release/node + 022c7000-022e1000 rw-p 01cc7000 08:01 538609 /home/ssuresh/node/out/Release/node + 022e1000-02412000 rw-p 00000000 00:00 0 [heap] + + If we look at the elf header (info file in gdb we see the .text.In the custom linker script we provide an entry to that. + + `/home/ssuresh/node/out/Release/node', file type elf64-x86-64. + Entry point: 0x847e60 + 0x0000000000400270 - 0x000000000040028c is .interp + 0x000000000040028c - 0x00000000004002ac is .note.ABI-tag + 0x00000000004002ac - 0x00000000004002d0 is .note.gnu.build-id + 0x00000000004002d0 - 0x0000000000453ce0 is .gnu.hash + 0x0000000000453ce0 - 0x000000000055be18 is .dynsym + 0x000000000055be18 - 0x0000000000829208 is .dynstr + 0x0000000000829208 - 0x000000000083f222 is .gnu.version + 0x000000000083f228 - 0x000000000083f438 is .gnu.version_r + 0x000000000083f438 - 0x000000000083f660 is .rela.dyn + 0x000000000083f660 - 0x0000000000841d30 is .rela.plt + 0x0000000000841d30 - 0x0000000000841d4f is .init + 0x0000000000841d50 - 0x0000000000843740 is .plt + 0x0000000000843740 - 0x0000000000843748 is .plt.got + 0x0000000000843800 - 0x0000000001680519 is .text + 0x000000000168051c - 0x0000000001680525 is .fini + 0x0000000001681000 - 0x0000000001ed0778 is .rodata + 0x0000000001ed0778 - 0x0000000001ed0780 is .eh_frame_hdr + 0x0000000002000000 - 0x00000000023a97c4 is .eh_frame + 0x00000000025a9d50 - 0x00000000025a9d54 is .tbss + 0x00000000025a9d50 - 0x00000000025a9da0 is .init_array + 0x00000000025a9da0 - 0x00000000025a9db8 is .fini_array + 0x00000000025a9db8 - 0x00000000025a9dc0 is .jcr + 0x00000000025a9dc0 - 0x00000000025a9ff0 is .dynamic + 0x00000000025a9ff0 - 0x00000000025aa000 is .got + 0x00000000025aa000 - 0x00000000025aad08 is .got.plt + 0x00000000025aad20 - 0x00000000025c39f0 is .data + 0x00000000025c3a00 - 0x00000000025dbb28 is .bss + + */ + + static struct TextRegion find_node_text_region() { + FILE *f; + long unsigned int start, end, offset, inode; + char perm[5], dev[6], name[256]; + int ret; + const size_t hugePageSize = 2L * 1024 * 1024; + struct TextRegion nregion; + + + f = fopen("/proc/self/maps", "r"); + + ret = fscanf(f, "%lx-%lx %4s %lx %5s %ld %s\n", &start, &end, perm, &offset, dev, &inode, name); + if (ret == 7 && perm[0] == 'r' && perm[1] == '-' && perm[2] == 'x' + && name[0] == '/' && strstr(name, "node") != NULL) { + // Checking if the region is from node binary and executable + // 00400000-020c7000 r-xp 00000000 08:01 538609 /home/ssuresh/node/out/Release/node + + // Need to align the from and to to the 2M Boundary + fprintf(stderr,"exe start %p exe end %p \n", &__executable_start, &__etext); + + fprintf(stderr, "find_node_text_region %lx-%lx %s\n", start, end, name); + // start = (unsigned int long) &__executable_start; + // end = (unsigned int long) &__etext; + // start = 0x0842700; + // end = 0x1679e19; + start = (unsigned int long) &__nodetext; + end = (unsigned int long) &__etext; + char *from = (char *)PAGE_ALIGN_UP(start, hugePageSize); + char *to = (char *)PAGE_ALIGN_DOWN(end, hugePageSize); + fprintf(stderr, "find_node_text_region %lx-%lx %s\n", from, to, name); + nregion.from = from; + nregion.to = to; + nregion.offset = offset; + strcpy(nregion.name,name); + if (from > to) { + // Handle Error + return nregion; + } + return nregion; + } + else { + // Handle Error + return nregion; + } + + } + + /* + + Moving the text region to large pages. We need to be very careful. + a) This function itself should not be moved. + We use a gcc option to put it outside the .text area + b) This function should not call any functions that might be moved. + + 1. We map a new area and copy the original code there + 2. We mmap using HUGE_TLB + 3. If we are successful we copy the code there and unmap the original region. + + */ + + void + __attribute__((__section__(".eh_frame"))) + __attribute__((__aligned__(2 * 1024 * 1024))) + __attribute__((__noinline__)) + __attribute__((__optimize__("2"))) + move_text_region_to_large_pages(struct TextRegion r) { + size_t size = (intptr_t)r.to - (intptr_t)r.from; + void *nmem, *ret; + void *start = r.from; + const size_t hugePageSize = 2L * 1024 * 1024; + const size_t fourkPageSize = 4L * 1024; + + fprintf(stderr,"exe start %p exe end %p %p \n", &__executable_start, &__etext, &move_text_region_to_large_pages); + + fprintf(stderr, "move_text_region_to_large_pages %p %p %d %d\n", start, r.to, size/hugePageSize, size/fourkPageSize); + // nmem = malloc(size); + nmem = mmap(NULL, size,PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS,-1, 0); + memcpy(nmem, r.from, size); + + // if (nmem == MAP_FAILED) { + // fprintf(stderr, "mmap failed\n"); + // } + // memcpy(nmem, r.from, size); + // fprintf(stderr, "move_text_region_to_large_pages %p %lx %p\n", start, size, nmem); + + mmap(start, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED | MAP_HUGETLB, -1 , 0); + // madvise(start, size, MADV_HUGEPAGE); + memcpy(start, nmem, size); + mprotect(start, size, PROT_READ | PROT_EXEC); + munmap(nmem, size); + // free(nmem); + + } + bool transHugePagesPresent=false; + bool explicitHugePagesPresent = false; + + /* + + You’ll see a list of all possible options ( always, madvise, never ), with + the currently active option being enclosed in brackets.madvise is the default. + This means transparent hugepages are only enabled for memory regions that + explicitly request hugepages using madvise(2). + + Applications that gets a lot of benefit from hugepages and that don't + risk to lose memory by using hugepages, should use + madvise(MADV_HUGEPAGE) on their critical mmapped regions. + */ + static bool isTransparentHugePagesEnabled() { + return true; + } + static bool isExplicitHugePagesEnabled() { + return true; + } + + /* This is the primary interface that is exposed */ + + bool isLargePagesEnabled() { + return isExplicitHugePagesEnabled() || isTransparentHugePagesEnabled(); + } + + void map_static_code_to_large_pages() { + struct TextRegion n; + fprintf(stderr, "mapping static code to large pages\n"); + // starting and ending address of the region in the node process + n = find_node_text_region(); + fprintf(stderr, "n.from=%p n.to=%p n.name = %s map_static_code_to_large_pages = %p\n", n.from, n.to, n.name, &move_text_region_to_large_pages); + if (n.to <= (void *) & move_text_region_to_large_pages) + move_text_region_to_large_pages(n); + + } + } +} diff --git a/src/node_large_page.h b/src/node_large_page.h new file mode 100644 index 00000000000000..de194fa5a94517 --- /dev/null +++ b/src/node_large_page.h @@ -0,0 +1,11 @@ +#ifndef NODE_LARGEPAGE_H_ +#define NODE_LARGEPAGE_H_ + +namespace node { + namespace largepages { + void map_static_code_to_large_pages(); + } // namespace largepages +} // namespace node + +#endif // NODE_LARGE_PAGE_H_ + From fa0e3243c3bda61aa83686933daf0b78afc800ab Mon Sep 17 00:00:00 2001 From: Suresh Srinivas Date: Mon, 26 Mar 2018 12:10:09 -0700 Subject: [PATCH 02/24] Add support for checking if explicitHugePages and transparentHugePages are enabled --- src/node_large_page.cc | 75 +++++++++++++++++++++++++++++++++++------- 1 file changed, 63 insertions(+), 12 deletions(-) diff --git a/src/node_large_page.cc b/src/node_large_page.cc index be55eae0a0c4e2..ff37925e26142e 100644 --- a/src/node_large_page.cc +++ b/src/node_large_page.cc @@ -10,6 +10,9 @@ #include #include +#include +#include +#include /* The functions in this file map the text segment of node into 2M pages. @@ -104,9 +107,9 @@ namespace node { // 00400000-020c7000 r-xp 00000000 08:01 538609 /home/ssuresh/node/out/Release/node // Need to align the from and to to the 2M Boundary - fprintf(stderr,"exe start %p exe end %p \n", &__executable_start, &__etext); + // fprintf(stderr,"exe start %p exe end %p \n", &__executable_start, &__etext); - fprintf(stderr, "find_node_text_region %lx-%lx %s\n", start, end, name); + // fprintf(stderr, "find_node_text_region %lx-%lx %s\n", start, end, name); // start = (unsigned int long) &__executable_start; // end = (unsigned int long) &__etext; // start = 0x0842700; @@ -115,7 +118,7 @@ namespace node { end = (unsigned int long) &__etext; char *from = (char *)PAGE_ALIGN_UP(start, hugePageSize); char *to = (char *)PAGE_ALIGN_DOWN(end, hugePageSize); - fprintf(stderr, "find_node_text_region %lx-%lx %s\n", from, to, name); + // fprintf(stderr, "find_node_text_region %lx-%lx %s\n", from, to, name); nregion.from = from; nregion.to = to; nregion.offset = offset; @@ -158,9 +161,9 @@ namespace node { const size_t hugePageSize = 2L * 1024 * 1024; const size_t fourkPageSize = 4L * 1024; - fprintf(stderr,"exe start %p exe end %p %p \n", &__executable_start, &__etext, &move_text_region_to_large_pages); + // fprintf(stderr,"exe start %p exe end %p %p \n", &__executable_start, &__etext, &move_text_region_to_large_pages); - fprintf(stderr, "move_text_region_to_large_pages %p %p %d %d\n", start, r.to, size/hugePageSize, size/fourkPageSize); + // fprintf(stderr, "move_text_region_to_large_pages %p %p %d %d\n", start, r.to, size/hugePageSize, size/fourkPageSize); // nmem = malloc(size); nmem = mmap(NULL, size,PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS,-1, 0); memcpy(nmem, r.from, size); @@ -170,9 +173,14 @@ namespace node { // } // memcpy(nmem, r.from, size); // fprintf(stderr, "move_text_region_to_large_pages %p %lx %p\n", start, size, nmem); - + +#if 0 // use for explicit huge pages mmap(start, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED | MAP_HUGETLB, -1 , 0); - // madvise(start, size, MADV_HUGEPAGE); +#endif +#if 1 // use for transparent huge pages + mmap(start, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1 , 0); + madvise(start, size, MADV_HUGEPAGE); +#endif memcpy(start, nmem, size); mprotect(start, size, PROT_READ | PROT_EXEC); munmap(nmem, size); @@ -181,7 +189,6 @@ namespace node { } bool transHugePagesPresent=false; bool explicitHugePagesPresent = false; - /* You’ll see a list of all possible options ( always, madvise, never ), with @@ -194,24 +201,68 @@ namespace node { madvise(MADV_HUGEPAGE) on their critical mmapped regions. */ static bool isTransparentHugePagesEnabled() { - return true; + + std::ifstream ifs; + ifs.open("/sys/kernel/mm/transparent_hugepage/enabled"); + std::string always, madvise, never; + if (ifs.is_open()) { + while(ifs >> always >> madvise >> never) ; + // std::cout << always << madvise << never; + } + if (always.compare("[always]") == 0) + return true; + if (madvise.compare("[madvise]") == 0) + return true; + + return false; } static bool isExplicitHugePagesEnabled() { - return true; + std::string kw; + std::ifstream file("/proc/meminfo"); + while(file >> kw) { + if(kw == "HugePages_Total:") { + unsigned long hp_tot; + file >> hp_tot; + if (hp_tot > 0) + return true; + else + return false; + } + } + return false; // HugePages not found + } + + static int howManyExplicitHugePagesFree() { + std::string kw; + std::ifstream file("/proc/meminfo"); + while(file >> kw) { + if(kw == "HugePages_Free:") { + unsigned long hp_free; + file >> hp_free; + return hp_free; + } + } + return 0; // HugePages not found + } /* This is the primary interface that is exposed */ bool isLargePagesEnabled() { + bool trans = isTransparentHugePagesEnabled(); + // fprintf(stderr, "Transparent Huge Pages = %s\n", trans ? "enabled" : "disabled"); + bool explict = isExplicitHugePagesEnabled(); + // fprintf(stderr, "Explicit Huge Pages = %s\n", explict ? "enabled" : "disabled"); + return isExplicitHugePagesEnabled() || isTransparentHugePagesEnabled(); } void map_static_code_to_large_pages() { struct TextRegion n; - fprintf(stderr, "mapping static code to large pages\n"); + // fprintf(stderr, "mapping static code to large pages\n"); // starting and ending address of the region in the node process n = find_node_text_region(); - fprintf(stderr, "n.from=%p n.to=%p n.name = %s map_static_code_to_large_pages = %p\n", n.from, n.to, n.name, &move_text_region_to_large_pages); + // fprintf(stderr, "n.from=%p n.to=%p n.name = %s map_static_code_to_large_pages = %p\n", n.from, n.to, n.name, &move_text_region_to_large_pages); if (n.to <= (void *) & move_text_region_to_large_pages) move_text_region_to_large_pages(n); From fdb45cd3fcc2989bec11fa2c024720a71f07205e Mon Sep 17 00:00:00 2001 From: Suresh Srinivas Date: Fri, 30 Mar 2018 10:02:34 -0700 Subject: [PATCH 03/24] Added License headers Increased number of Pages to be mapped --- src/node_large_page.cc | 29 +++++++++++++++++++++++++++-- src/node_large_page.h | 27 ++++++++++++++++++++++++++- 2 files changed, 53 insertions(+), 3 deletions(-) diff --git a/src/node_large_page.cc b/src/node_large_page.cc index ff37925e26142e..3516df81e302c3 100644 --- a/src/node_large_page.cc +++ b/src/node_large_page.cc @@ -1,3 +1,27 @@ +/* +Copyright (C) 2018 Intel Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +OR OTHER DEALINGS IN THE SOFTWARE. + +SPDX-License-Identifier: MIT +*/ + #include #include // _O_RDWR #include // PATH_MAX @@ -17,7 +41,7 @@ /* The functions in this file map the text segment of node into 2M pages. The algorithm is quite simple - 1. Find the text region of node in memory + 1. Find the text region of node binary in memory 2. Move the text region to large pages */ @@ -114,8 +138,9 @@ namespace node { // end = (unsigned int long) &__etext; // start = 0x0842700; // end = 0x1679e19; + // fprintf(stderr, "find_node_text_region start-end: %lx-%lx nodestart-nodeend %lx-%lx\n", start, end, (unsigned int long)&__nodetext, (unsigned int long) &__etext); start = (unsigned int long) &__nodetext; - end = (unsigned int long) &__etext; +// end = (unsigned int long) &__etext; char *from = (char *)PAGE_ALIGN_UP(start, hugePageSize); char *to = (char *)PAGE_ALIGN_DOWN(end, hugePageSize); // fprintf(stderr, "find_node_text_region %lx-%lx %s\n", from, to, name); diff --git a/src/node_large_page.h b/src/node_large_page.h index de194fa5a94517..d2cbafd51ca72b 100644 --- a/src/node_large_page.h +++ b/src/node_large_page.h @@ -1,9 +1,34 @@ +/* +Copyright (C) 2018 Intel Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +OR OTHER DEALINGS IN THE SOFTWARE. + +SPDX-License-Identifier: MIT +*/ + #ifndef NODE_LARGEPAGE_H_ #define NODE_LARGEPAGE_H_ namespace node { namespace largepages { - void map_static_code_to_large_pages(); + bool isLargePagesEnabled(); + void map_static_code_to_large_pages(); } // namespace largepages } // namespace node From b7791c2e5703e4a6eb50049a5e0b5d35dc18dd3c Mon Sep 17 00:00:00 2001 From: Suresh Srinivas Date: Fri, 30 Mar 2018 13:03:57 -0700 Subject: [PATCH 04/24] Get rid of waring messages & code cleanup Changes to be committed: modified: src/node_large_page.cc --- src/node_large_page.cc | 54 ++++++++++++++++-------------------------- 1 file changed, 20 insertions(+), 34 deletions(-) diff --git a/src/node_large_page.cc b/src/node_large_page.cc index 3516df81e302c3..67015f115a8a9b 100644 --- a/src/node_large_page.cc +++ b/src/node_large_page.cc @@ -61,8 +61,11 @@ namespace node { void * from; void * to; long offset; + int totalHugePages; char name[PATH_MAX]; }; + const size_t hugePageSize = 2L * 1024 * 1024; + const size_t fourkPageSize = 4L * 1024; /* @@ -129,24 +132,15 @@ namespace node { && name[0] == '/' && strstr(name, "node") != NULL) { // Checking if the region is from node binary and executable // 00400000-020c7000 r-xp 00000000 08:01 538609 /home/ssuresh/node/out/Release/node - - // Need to align the from and to to the 2M Boundary - // fprintf(stderr,"exe start %p exe end %p \n", &__executable_start, &__etext); - - // fprintf(stderr, "find_node_text_region %lx-%lx %s\n", start, end, name); - // start = (unsigned int long) &__executable_start; - // end = (unsigned int long) &__etext; - // start = 0x0842700; - // end = 0x1679e19; - // fprintf(stderr, "find_node_text_region start-end: %lx-%lx nodestart-nodeend %lx-%lx\n", start, end, (unsigned int long)&__nodetext, (unsigned int long) &__etext); start = (unsigned int long) &__nodetext; // end = (unsigned int long) &__etext; char *from = (char *)PAGE_ALIGN_UP(start, hugePageSize); char *to = (char *)PAGE_ALIGN_DOWN(end, hugePageSize); - // fprintf(stderr, "find_node_text_region %lx-%lx %s\n", from, to, name); + size_t size = (intptr_t)to - (intptr_t)from; nregion.from = from; nregion.to = to; nregion.offset = offset; + nregion.totalHugePages = size/hugePageSize; strcpy(nregion.name,name); if (from > to) { // Handle Error @@ -181,24 +175,12 @@ namespace node { __attribute__((__optimize__("2"))) move_text_region_to_large_pages(struct TextRegion r) { size_t size = (intptr_t)r.to - (intptr_t)r.from; - void *nmem, *ret; + void *nmem; void *start = r.from; - const size_t hugePageSize = 2L * 1024 * 1024; - const size_t fourkPageSize = 4L * 1024; - - // fprintf(stderr,"exe start %p exe end %p %p \n", &__executable_start, &__etext, &move_text_region_to_large_pages); - // fprintf(stderr, "move_text_region_to_large_pages %p %p %d %d\n", start, r.to, size/hugePageSize, size/fourkPageSize); - // nmem = malloc(size); nmem = mmap(NULL, size,PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS,-1, 0); memcpy(nmem, r.from, size); - // if (nmem == MAP_FAILED) { - // fprintf(stderr, "mmap failed\n"); - // } - // memcpy(nmem, r.from, size); - // fprintf(stderr, "move_text_region_to_large_pages %p %lx %p\n", start, size, nmem); - #if 0 // use for explicit huge pages mmap(start, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED | MAP_HUGETLB, -1 , 0); #endif @@ -209,11 +191,12 @@ namespace node { memcpy(start, nmem, size); mprotect(start, size, PROT_READ | PROT_EXEC); munmap(nmem, size); - // free(nmem); } bool transHugePagesPresent=false; bool explicitHugePagesPresent = false; + + static bool isTransparentHugePagesEnabled() { /* You’ll see a list of all possible options ( always, madvise, never ), with @@ -225,14 +208,12 @@ namespace node { risk to lose memory by using hugepages, should use madvise(MADV_HUGEPAGE) on their critical mmapped regions. */ - static bool isTransparentHugePagesEnabled() { std::ifstream ifs; ifs.open("/sys/kernel/mm/transparent_hugepage/enabled"); std::string always, madvise, never; if (ifs.is_open()) { while(ifs >> always >> madvise >> never) ; - // std::cout << always << madvise << never; } if (always.compare("[always]") == 0) return true; @@ -270,24 +251,29 @@ namespace node { return 0; // HugePages not found } + + static void testHugePageSupport() { + bool trans = isTransparentHugePagesEnabled(); + fprintf(stderr, "Transparent Huge Pages = %s\n", trans ? "enabled" : "disabled"); + bool explict = isExplicitHugePagesEnabled(); + fprintf(stderr, "Explicit Huge Pages = %s\n", explict ? "enabled" : "disabled"); + if (explict) { + int hpfree = howManyExplicitHugePagesFree(); + fprintf(stderr, "Number of Explicit Huge Pages = %d\n", hpfree); + } + + } /* This is the primary interface that is exposed */ bool isLargePagesEnabled() { - bool trans = isTransparentHugePagesEnabled(); - // fprintf(stderr, "Transparent Huge Pages = %s\n", trans ? "enabled" : "disabled"); - bool explict = isExplicitHugePagesEnabled(); - // fprintf(stderr, "Explicit Huge Pages = %s\n", explict ? "enabled" : "disabled"); return isExplicitHugePagesEnabled() || isTransparentHugePagesEnabled(); } void map_static_code_to_large_pages() { struct TextRegion n; - // fprintf(stderr, "mapping static code to large pages\n"); - // starting and ending address of the region in the node process n = find_node_text_region(); - // fprintf(stderr, "n.from=%p n.to=%p n.name = %s map_static_code_to_large_pages = %p\n", n.from, n.to, n.name, &move_text_region_to_large_pages); if (n.to <= (void *) & move_text_region_to_large_pages) move_text_region_to_large_pages(n); From 6631eea053a042ff3a820896fe3e95b6450075b6 Mon Sep 17 00:00:00 2001 From: Suresh Srinivas Date: Wed, 4 Apr 2018 12:07:30 -0700 Subject: [PATCH 05/24] Protect the large pages under #ifdef NODE_ENABLE_LARGE_CODE_PAGES and enable node_use_large_pages=true in configure and protect the node_large_page.cc and the link using node_use_large_pages --- configure | 3 +++ node.gyp | 14 +++++++++++++- node.gypi | 17 +++++++++++++---- src/node.cc | 9 ++++++--- 4 files changed, 35 insertions(+), 8 deletions(-) diff --git a/configure b/configure index 5d2e800015f72c..49b3a033afeac8 100755 --- a/configure +++ b/configure @@ -903,6 +903,9 @@ def configure_node(o): else: o['variables']['node_use_dtrace'] = 'false' + if flavor == 'linux': + o['variables']['node_use_large_pages'] = 'true' + if options.no_ifaddrs: o['defines'] += ['SUNOS_NO_IFADDRS'] diff --git a/node.gyp b/node.gyp index 7465bbb1cd1e8c..557ba7576d3a55 100644 --- a/node.gyp +++ b/node.gyp @@ -302,7 +302,6 @@ 'src/js_stream.cc', 'src/module_wrap.cc', 'src/node.cc', - 'src/node_large_page.cc', 'src/node_api.cc', 'src/node_api.h', 'src/node_api_types.h', @@ -581,6 +580,19 @@ 'src/tls_wrap.h' ], }], + [ 'node_use_large_pages=="true"', { + 'defines': [ 'NODE_ENABLE_LARGE_CODE_PAGES=1' ], + # The current implementation of Large Pages is under Linux. + # Other implementations are possible but not currently supported. + # + 'conditions': [ + [ 'OS=="linux"', { + 'sources': [ + 'src/node_large_page.cc' + ], + }], + ] + } ], ], }, { diff --git a/node.gypi b/node.gypi index 5c88fd8e855d77..3df07cd921e8b5 100644 --- a/node.gypi +++ b/node.gypi @@ -161,14 +161,24 @@ }, }, 'conditions': [ - ['OS!="aix" and node_shared=="false"', { + ['OS!="aix" and node_shared=="false" and node_use_large_pages=="true"', { + 'ldflags': [ + '-Wl,-T <(PRODUCT_DIR)/../../ld.script', + '-Wl,--whole-archive,<(obj_dir)/deps/uv/<(STATIC_LIB_PREFIX)' + 'uv<(STATIC_LIB_SUFFIX)', + '-Wl,--no-whole-archive', + ] + }], + + ['OS!="aix" and node_shared=="false" and node_use_large_pages=="false"', { 'ldflags': [ - '-Wl,-T <(PRODUCT_DIR)/../../ld.script', '-Wl,--whole-archive,<(obj_dir)/deps/uv/<(STATIC_LIB_PREFIX)' 'uv<(STATIC_LIB_SUFFIX)', '-Wl,--no-whole-archive', ], + }], + ], }], ], @@ -319,6 +329,5 @@ }, { 'defines': [ 'HAVE_OPENSSL=0' ] }], - - ], + ] } diff --git a/src/node.cc b/src/node.cc index bf5d53fc64c0e4..90e54f05e1fc5c 100644 --- a/src/node.cc +++ b/src/node.cc @@ -4485,9 +4485,12 @@ int Start(int argc, char** argv) { CHECK_GT(argc, 0); - //#ifdef NODE_ENABLE_LARGE_CODE_PAGES - node::largepages::map_static_code_to_large_pages(); - //#endif +#ifdef NODE_ENABLE_LARGE_CODE_PAGES + if (node::largepages::isLargePagesEnabled()) { + // fprintf(stderr, "Mapping static code to large pages\n"); + node::largepages::map_static_code_to_large_pages(); + } +#endif // Hack around with the argv pointer. Used for process.title = "blah". From 843089c2faf8ebd389c69aa8c07d8e601d89c2b3 Mon Sep 17 00:00:00 2001 From: Uttam Pawar Date: Mon, 9 Apr 2018 14:54:42 -0700 Subject: [PATCH 06/24] Added configure option to enable huge pages --- configure | 6 ++++++ node.gyp | 6 +++--- node.gypi | 13 +++++-------- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/configure b/configure index 49b3a033afeac8..750826ad23c9a9 100755 --- a/configure +++ b/configure @@ -388,6 +388,11 @@ parser.add_option('--with-etw', dest='with_etw', help='build with ETW (default is true on Windows)') +parser.add_option('--use-largepages', + action='store_true', + dest='node_use_large_pages', + help='build with Large Pages support (enabled only for Linux).') + intl_optgroup.add_option('--with-intl', action='store', dest='with_intl', @@ -904,6 +909,7 @@ def configure_node(o): o['variables']['node_use_dtrace'] = 'false' if flavor == 'linux': + print(' Large pages is ', options.node_use_large_pages); o['variables']['node_use_large_pages'] = 'true' if options.no_ifaddrs: diff --git a/node.gyp b/node.gyp index 557ba7576d3a55..69db47193ffdd1 100644 --- a/node.gyp +++ b/node.gyp @@ -580,15 +580,15 @@ 'src/tls_wrap.h' ], }], - [ 'node_use_large_pages=="true"', { + [ 'node_use_large_pages=="true"', { 'defines': [ 'NODE_ENABLE_LARGE_CODE_PAGES=1' ], # The current implementation of Large Pages is under Linux. - # Other implementations are possible but not currently supported. + # Other implementations are possible but not currently supported. # 'conditions': [ [ 'OS=="linux"', { 'sources': [ - 'src/node_large_page.cc' + 'src/node_large_page.cc' ], }], ] diff --git a/node.gypi b/node.gypi index 3df07cd921e8b5..30c8eea0b3b41f 100644 --- a/node.gypi +++ b/node.gypi @@ -161,24 +161,21 @@ }, }, 'conditions': [ - ['OS!="aix" and node_shared=="false" and node_use_large_pages=="true"', { - 'ldflags': [ + ['OS!="aix" and node_shared=="false" and node_use_large_pages=="true"', { + 'ldflags': [ '-Wl,-T <(PRODUCT_DIR)/../../ld.script', - '-Wl,--whole-archive,<(obj_dir)/deps/uv/<(STATIC_LIB_PREFIX)' + '-Wl,--whole-archive,<(obj_dir)/deps/uv/<(STATIC_LIB_PREFIX)' 'uv<(STATIC_LIB_SUFFIX)', '-Wl,--no-whole-archive', ] - }], - - ['OS!="aix" and node_shared=="false" and node_use_large_pages=="false"', { + }], + ['OS!="aix" and node_shared=="false" and node_use_large_pages=="false"', { 'ldflags': [ '-Wl,--whole-archive,<(obj_dir)/deps/uv/<(STATIC_LIB_PREFIX)' 'uv<(STATIC_LIB_SUFFIX)', '-Wl,--no-whole-archive', ], - }], - ], }], ], From 077bc0108bd4f4aaee5ec03a3f4d366d0e529020 Mon Sep 17 00:00:00 2001 From: Uttam Pawar Date: Mon, 9 Apr 2018 16:49:25 -0700 Subject: [PATCH 07/24] Added else clause to set node_use_large_pages=false by default --- configure | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/configure b/configure index 750826ad23c9a9..f06d0331c26291 100755 --- a/configure +++ b/configure @@ -909,9 +909,11 @@ def configure_node(o): o['variables']['node_use_dtrace'] = 'false' if flavor == 'linux': - print(' Large pages is ', options.node_use_large_pages); - o['variables']['node_use_large_pages'] = 'true' - + if options.node_use_large_pages: + o['variables']['node_use_large_pages'] = 'true' + else: + o['variables']['node_use_large_pages'] = 'false' + if options.no_ifaddrs: o['defines'] += ['SUNOS_NO_IFADDRS'] From b91de5ad2bbb93ea01b780bcc182b330c8f5ffd7 Mon Sep 17 00:00:00 2001 From: Uttam Pawar Date: Tue, 8 May 2018 12:36:28 -0700 Subject: [PATCH 08/24] Finished adding checks at appropriate places to handle possible error condition --- src/node_large_page.cc | 396 +++++++++++++++++++---------------------- 1 file changed, 185 insertions(+), 211 deletions(-) diff --git a/src/node_large_page.cc b/src/node_large_page.cc index 67015f115a8a9b..da96df8773f13f 100644 --- a/src/node_large_page.cc +++ b/src/node_large_page.cc @@ -1,26 +1,24 @@ -/* -Copyright (C) 2018 Intel Corporation - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), -to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, -and/or sell copies of the Software, and to permit persons to whom -the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included -in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES -OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE -OR OTHER DEALINGS IN THE SOFTWARE. - -SPDX-License-Identifier: MIT -*/ +// Copyright (C) 2018 Intel Corporation +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom +// the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +// +// SPDX-License-Identifier: MIT #include #include // _O_RDWR @@ -37,85 +35,36 @@ SPDX-License-Identifier: MIT #include #include #include - -/* -The functions in this file map the text segment of node into 2M pages. -The algorithm is quite simple - 1. Find the text region of node binary in memory - 2. Move the text region to large pages -*/ + +// The functions in this file map the text segment of node into 2M pages. +// The algorithm is quite simple +// 1. Find the text region of node binary in memory +// 2. Move the text region to large pages extern char __executable_start; extern char __etext; extern char __nodetext; namespace node { - namespace largepages { - #define ALIGN(x, a) (((x) + (a) - 1) & ~((a) - 1)) #define PAGE_ALIGN_UP(x,a) ALIGN(x,a) #define PAGE_ALIGN_DOWN(x,a) ((x) & ~((a) - 1)) - + struct TextRegion { + int found_text_region; void * from; void * to; - long offset; - int totalHugePages; - char name[PATH_MAX]; + int totalHugePages; + long offset; + char name[PATH_MAX]; }; - const size_t hugePageSize = 2L * 1024 * 1024; - const size_t fourkPageSize = 4L * 1024; - - - /* - Finding the text region. - 1. We read the maps file and find the start and end addresss of the loaded node process - 2. Within that start and end address is the .text region is what we are interested in. - 3. We modify the linker script to PROVIDE(__nodetext) which points to this region. - 4. _etext is the end of the .text segment. - 5. We return back a struct of the TextRegion - - The /proc//maps looks like this. The first entry is executable of the node process and it's address is from 00400000-020c7000 - 00400000-020c7000 r-xp 00000000 08:01 538609 /home/ssuresh/node/out/Release/node - 022c6000-022c7000 r--p 01cc6000 08:01 538609 /home/ssuresh/node/out/Release/node - 022c7000-022e1000 rw-p 01cc7000 08:01 538609 /home/ssuresh/node/out/Release/node - 022e1000-02412000 rw-p 00000000 00:00 0 [heap] - - If we look at the elf header (info file in gdb we see the .text.In the custom linker script we provide an entry to that. - - `/home/ssuresh/node/out/Release/node', file type elf64-x86-64. - Entry point: 0x847e60 - 0x0000000000400270 - 0x000000000040028c is .interp - 0x000000000040028c - 0x00000000004002ac is .note.ABI-tag - 0x00000000004002ac - 0x00000000004002d0 is .note.gnu.build-id - 0x00000000004002d0 - 0x0000000000453ce0 is .gnu.hash - 0x0000000000453ce0 - 0x000000000055be18 is .dynsym - 0x000000000055be18 - 0x0000000000829208 is .dynstr - 0x0000000000829208 - 0x000000000083f222 is .gnu.version - 0x000000000083f228 - 0x000000000083f438 is .gnu.version_r - 0x000000000083f438 - 0x000000000083f660 is .rela.dyn - 0x000000000083f660 - 0x0000000000841d30 is .rela.plt - 0x0000000000841d30 - 0x0000000000841d4f is .init - 0x0000000000841d50 - 0x0000000000843740 is .plt - 0x0000000000843740 - 0x0000000000843748 is .plt.got - 0x0000000000843800 - 0x0000000001680519 is .text - 0x000000000168051c - 0x0000000001680525 is .fini - 0x0000000001681000 - 0x0000000001ed0778 is .rodata - 0x0000000001ed0778 - 0x0000000001ed0780 is .eh_frame_hdr - 0x0000000002000000 - 0x00000000023a97c4 is .eh_frame - 0x00000000025a9d50 - 0x00000000025a9d54 is .tbss - 0x00000000025a9d50 - 0x00000000025a9da0 is .init_array - 0x00000000025a9da0 - 0x00000000025a9db8 is .fini_array - 0x00000000025a9db8 - 0x00000000025a9dc0 is .jcr - 0x00000000025a9dc0 - 0x00000000025a9ff0 is .dynamic - 0x00000000025a9ff0 - 0x00000000025aa000 is .got - 0x00000000025aa000 - 0x00000000025aad08 is .got.plt - 0x00000000025aad20 - 0x00000000025c39f0 is .data - 0x00000000025c3a00 - 0x00000000025dbb28 is .bss - - */ - + + static void printSystemError(int error) { + fprintf(stderr, "Hugepages WARNING: %s\n", strerror(error)); + return; + } + static struct TextRegion find_node_text_region() { FILE *f; long unsigned int start, end, offset, inode; @@ -124,159 +73,184 @@ namespace node { const size_t hugePageSize = 2L * 1024 * 1024; struct TextRegion nregion; + nregion.found_text_region = 0; f = fopen("/proc/self/maps", "r"); - - ret = fscanf(f, "%lx-%lx %4s %lx %5s %ld %s\n", &start, &end, perm, &offset, dev, &inode, name); - if (ret == 7 && perm[0] == 'r' && perm[1] == '-' && perm[2] == 'x' - && name[0] == '/' && strstr(name, "node") != NULL) { - // Checking if the region is from node binary and executable - // 00400000-020c7000 r-xp 00000000 08:01 538609 /home/ssuresh/node/out/Release/node - start = (unsigned int long) &__nodetext; -// end = (unsigned int long) &__etext; - char *from = (char *)PAGE_ALIGN_UP(start, hugePageSize); - char *to = (char *)PAGE_ALIGN_DOWN(end, hugePageSize); - size_t size = (intptr_t)to - (intptr_t)from; - nregion.from = from; - nregion.to = to; - nregion.offset = offset; - nregion.totalHugePages = size/hugePageSize; - strcpy(nregion.name,name); - if (from > to) { - // Handle Error - return nregion; - } - return nregion; - } - else { - // Handle Error - return nregion; + ret = fscanf(f, "%lx-%lx %4s %lx %5s %ld %s\n", + &start, &end, perm, &offset, dev, &inode, name); + fclose(f); + + if (ret == 7 && + perm[0] == 'r' && perm[1] == '-' && perm[2] == 'x') { + + // Checking if the region is from node binary and executable + start = (unsigned int long) &__nodetext; + char *from = (char *)PAGE_ALIGN_UP(start, hugePageSize); + char *to = (char *)PAGE_ALIGN_DOWN(end, hugePageSize); + + if (from < to) { + size_t size = (intptr_t)to - (intptr_t)from; + nregion.found_text_region = 1; + nregion.from = from; + nregion.to = to; + nregion.offset = offset; + nregion.totalHugePages = size/hugePageSize; + strcpy(nregion.name,name); + return nregion; + } } - - } - - /* - - Moving the text region to large pages. We need to be very careful. - a) This function itself should not be moved. - We use a gcc option to put it outside the .text area - b) This function should not call any functions that might be moved. - - 1. We map a new area and copy the original code there - 2. We mmap using HUGE_TLB - 3. If we are successful we copy the code there and unmap the original region. - */ - - void - __attribute__((__section__(".eh_frame"))) - __attribute__((__aligned__(2 * 1024 * 1024))) - __attribute__((__noinline__)) - __attribute__((__optimize__("2"))) - move_text_region_to_large_pages(struct TextRegion r) { - size_t size = (intptr_t)r.to - (intptr_t)r.from; - void *nmem; - void *start = r.from; - - nmem = mmap(NULL, size,PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS,-1, 0); - memcpy(nmem, r.from, size); - -#if 0 // use for explicit huge pages - mmap(start, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED | MAP_HUGETLB, -1 , 0); -#endif -#if 1 // use for transparent huge pages - mmap(start, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1 , 0); - madvise(start, size, MADV_HUGEPAGE); -#endif - memcpy(start, nmem, size); - mprotect(start, size, PROT_READ | PROT_EXEC); - munmap(nmem, size); - + return nregion; } - bool transHugePagesPresent=false; - bool explicitHugePagesPresent = false; static bool isTransparentHugePagesEnabled() { - /* - - You’ll see a list of all possible options ( always, madvise, never ), with - the currently active option being enclosed in brackets.madvise is the default. - This means transparent hugepages are only enabled for memory regions that - explicitly request hugepages using madvise(2). - - Applications that gets a lot of benefit from hugepages and that don't - risk to lose memory by using hugepages, should use - madvise(MADV_HUGEPAGE) on their critical mmapped regions. - */ - std::ifstream ifs; + ifs.open("/sys/kernel/mm/transparent_hugepage/enabled"); + if (!ifs) { + fprintf(stderr, "Hugepages WARNING: Couldn't check hugepages support\n"); + return false; + } + std::string always, madvise, never; - if (ifs.is_open()) { - while(ifs >> always >> madvise >> never) ; - } + if (ifs.is_open()) { + while(ifs >> always >> madvise >> never) ; + } + + int ret_status = false; + if (always.compare("[always]") == 0) - return true; - if (madvise.compare("[madvise]") == 0) - return true; + ret_status = true; + else if (madvise.compare("[madvise]") == 0) + ret_status = true; - return false; + ifs.close(); + return ret_status; } + static bool isExplicitHugePagesEnabled() { + int ret_status = false; std::string kw; std::ifstream file("/proc/meminfo"); while(file >> kw) { if(kw == "HugePages_Total:") { - unsigned long hp_tot; - file >> hp_tot; - if (hp_tot > 0) - return true; - else - return false; + unsigned long hp_tot; + file >> hp_tot; + if (hp_tot > 0) + ret_status = true; + else + ret_status = false; + + break; } } - return false; // HugePages not found + + file.close(); + return ret_status; } - static int howManyExplicitHugePagesFree() { - std::string kw; - std::ifstream file("/proc/meminfo"); - while(file >> kw) { - if(kw == "HugePages_Free:") { - unsigned long hp_free; - file >> hp_free; - return hp_free; + // Moving the text region to large pages. We need to be very careful. + // a) This function itself should not be moved. + // We use a gcc option to put it outside the ".text" section + // b) This function should not call any function(s) that might be moved. + // 1. We map a new area and copy the original code there + // 2. We mmap using HUGE_TLB + // 3. If successful we copy the code there and unmap the original region. + void + __attribute__((__section__(".eh_frame"))) + __attribute__((__aligned__(2 * 1024 * 1024))) + __attribute__((__noinline__)) + __attribute__((__optimize__("2"))) + move_text_region_to_large_pages(struct TextRegion r) { + void *nmem = NULL, *tmem = NULL; + int ret=0; + + size_t size = (intptr_t)r.to - (intptr_t)r.from; + void *start = r.from; + + // Allocate temporary region preparing for copy + nmem = mmap(NULL, size, + PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (nmem == MAP_FAILED) { + printSystemError(errno); + return; } - } - return 0; // HugePages not found - + + memcpy(nmem, r.from, size); + +#if 0 // use for explicit huge pages + mmap(start, size, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED | MAP_HUGETLB, -1 , 0); +#endif + + // use for transparent huge pages if enabled + if (isTransparentHugePagesEnabled()) { + tmem = mmap(start, size, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1 , 0); + if (tmem == MAP_FAILED) { + printSystemError(errno); + munmap(nmem, size); + return; + } + + ret = madvise(start, size, MADV_HUGEPAGE); + if (ret == -1) { + printSystemError(errno); + ret = munmap(tmem, size); + if (ret == -1) { + printSystemError(errno); + } + ret = munmap(nmem, size); + if (ret == -1) { + printSystemError(errno); + } + + return; + } + } + + memcpy(start, nmem, size); + ret = mprotect(start, size, PROT_READ | PROT_EXEC); + if (ret == -1) { + printSystemError(errno); + ret = munmap(tmem, size); + if (ret == -1) { + printSystemError(errno); + } + ret = munmap(nmem, size); + if (ret == -1) { + printSystemError(errno); + } + return; + } + + // Release the old/temporary mapped region + ret = munmap(nmem, size); + if (ret == -1) { + printSystemError(errno); + } + + return; } - static void testHugePageSupport() { - bool trans = isTransparentHugePagesEnabled(); - fprintf(stderr, "Transparent Huge Pages = %s\n", trans ? "enabled" : "disabled"); - bool explict = isExplicitHugePagesEnabled(); - fprintf(stderr, "Explicit Huge Pages = %s\n", explict ? "enabled" : "disabled"); - if (explict) { - int hpfree = howManyExplicitHugePagesFree(); - fprintf(stderr, "Number of Explicit Huge Pages = %d\n", hpfree); + // This is the primary API called from main + void map_static_code_to_large_pages() { + struct TextRegion n = find_node_text_region(); + if (n.found_text_region != 1) { + fprintf(stderr, "Hugepages WARNING: failed to map static code\n"); + return; } - + + if (n.to <= (void *) & move_text_region_to_large_pages) + move_text_region_to_large_pages(n); + + return; } - - /* This is the primary interface that is exposed */ bool isLargePagesEnabled() { - return isExplicitHugePagesEnabled() || isTransparentHugePagesEnabled(); } - - void map_static_code_to_large_pages() { - struct TextRegion n; - n = find_node_text_region(); - if (n.to <= (void *) & move_text_region_to_large_pages) - move_text_region_to_large_pages(n); - - } } } From 7ef956adaf3dd892fadce91f7f5a642c0f7492e3 Mon Sep 17 00:00:00 2001 From: Uttam Pawar Date: Tue, 8 May 2018 17:17:08 -0700 Subject: [PATCH 09/24] Changed return type for two function from void to int --- src/node.cc | 5 +++-- src/node_large_page.cc | 34 ++++++++++++++-------------------- src/node_large_page.h | 2 +- 3 files changed, 18 insertions(+), 23 deletions(-) diff --git a/src/node.cc b/src/node.cc index 90e54f05e1fc5c..8718139beb698e 100644 --- a/src/node.cc +++ b/src/node.cc @@ -4487,8 +4487,9 @@ int Start(int argc, char** argv) { #ifdef NODE_ENABLE_LARGE_CODE_PAGES if (node::largepages::isLargePagesEnabled()) { - // fprintf(stderr, "Mapping static code to large pages\n"); - node::largepages::map_static_code_to_large_pages(); + if ( (node::largepages::map_static_code_to_large_pages()) != 0) { + fprintf(stderr, "Warning: Mapping of static code to large pages failed.\n"); + } } #endif diff --git a/src/node_large_page.cc b/src/node_large_page.cc index da96df8773f13f..1b5e76b94d9030 100644 --- a/src/node_large_page.cc +++ b/src/node_large_page.cc @@ -52,11 +52,11 @@ namespace node { #define PAGE_ALIGN_DOWN(x,a) ((x) & ~((a) - 1)) struct TextRegion { - int found_text_region; void * from; void * to; int totalHugePages; long offset; + bool found_text_region; char name[PATH_MAX]; }; @@ -73,7 +73,7 @@ namespace node { const size_t hugePageSize = 2L * 1024 * 1024; struct TextRegion nregion; - nregion.found_text_region = 0; + nregion.found_text_region = false; f = fopen("/proc/self/maps", "r"); ret = fscanf(f, "%lx-%lx %4s %lx %5s %ld %s\n", @@ -90,7 +90,7 @@ namespace node { if (from < to) { size_t size = (intptr_t)to - (intptr_t)from; - nregion.found_text_region = 1; + nregion.found_text_region = true; nregion.from = from; nregion.to = to; nregion.offset = offset; @@ -156,7 +156,7 @@ namespace node { // 1. We map a new area and copy the original code there // 2. We mmap using HUGE_TLB // 3. If successful we copy the code there and unmap the original region. - void + int __attribute__((__section__(".eh_frame"))) __attribute__((__aligned__(2 * 1024 * 1024))) __attribute__((__noinline__)) @@ -173,17 +173,11 @@ namespace node { PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (nmem == MAP_FAILED) { printSystemError(errno); - return; + return -1; } memcpy(nmem, r.from, size); -#if 0 // use for explicit huge pages - mmap(start, size, - PROT_READ | PROT_WRITE | PROT_EXEC, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED | MAP_HUGETLB, -1 , 0); -#endif - // use for transparent huge pages if enabled if (isTransparentHugePagesEnabled()) { tmem = mmap(start, size, @@ -192,7 +186,7 @@ namespace node { if (tmem == MAP_FAILED) { printSystemError(errno); munmap(nmem, size); - return; + return -1; } ret = madvise(start, size, MADV_HUGEPAGE); @@ -207,7 +201,7 @@ namespace node { printSystemError(errno); } - return; + return -1; } } @@ -223,7 +217,7 @@ namespace node { if (ret == -1) { printSystemError(errno); } - return; + return -1; } // Release the old/temporary mapped region @@ -232,21 +226,21 @@ namespace node { printSystemError(errno); } - return; + return ret; } // This is the primary API called from main - void map_static_code_to_large_pages() { + int map_static_code_to_large_pages() { struct TextRegion n = find_node_text_region(); - if (n.found_text_region != 1) { + if (n.found_text_region == false) { fprintf(stderr, "Hugepages WARNING: failed to map static code\n"); - return; + return -1; } if (n.to <= (void *) & move_text_region_to_large_pages) - move_text_region_to_large_pages(n); + return move_text_region_to_large_pages(n); - return; + return -1; } bool isLargePagesEnabled() { diff --git a/src/node_large_page.h b/src/node_large_page.h index d2cbafd51ca72b..70537047eb5d5f 100644 --- a/src/node_large_page.h +++ b/src/node_large_page.h @@ -28,7 +28,7 @@ SPDX-License-Identifier: MIT namespace node { namespace largepages { bool isLargePagesEnabled(); - void map_static_code_to_large_pages(); + int map_static_code_to_large_pages(); } // namespace largepages } // namespace node From b02e7a982f0b37b245797c8ffee5aa14eb37f2d5 Mon Sep 17 00:00:00 2001 From: Uttam Pawar Date: Thu, 24 May 2018 14:59:01 -0700 Subject: [PATCH 10/24] Fixed lint errors. --- src/node.cc | 8 +++--- src/node_large_page.cc | 60 ++++++++++++++++++++-------------------- src/node_large_page.h | 62 ++++++++++++++++++++---------------------- 3 files changed, 64 insertions(+), 66 deletions(-) diff --git a/src/node.cc b/src/node.cc index 8718139beb698e..3d3a92247bed4a 100644 --- a/src/node.cc +++ b/src/node.cc @@ -4424,7 +4424,7 @@ inline int Start(Isolate* isolate, IsolateData* isolate_data, return exit_code; } - + inline int Start(uv_loop_t* event_loop, int argc, const char* const* argv, int exec_argc, const char* const* exec_argv) { @@ -4435,7 +4435,7 @@ inline int Start(uv_loop_t* event_loop, params.code_event_handler = vTune::GetVtuneCodeEventHandler(); #endif - + Isolate* const isolate = Isolate::New(params); if (isolate == nullptr) return 12; // Signal internal error. @@ -4487,8 +4487,8 @@ int Start(int argc, char** argv) { #ifdef NODE_ENABLE_LARGE_CODE_PAGES if (node::largepages::isLargePagesEnabled()) { - if ( (node::largepages::map_static_code_to_large_pages()) != 0) { - fprintf(stderr, "Warning: Mapping of static code to large pages failed.\n"); + if ((node::largepages::map_static_code_to_large_pages()) != 0) { + fprintf(stderr, "Mapping of static code to large pages failed.\n"); } } #endif diff --git a/src/node_large_page.cc b/src/node_large_page.cc index 1b5e76b94d9030..820cfed651d51f 100644 --- a/src/node_large_page.cc +++ b/src/node_large_page.cc @@ -46,19 +46,18 @@ extern char __etext; extern char __nodetext; namespace node { - namespace largepages { -#define ALIGN(x, a) (((x) + (a) - 1) & ~((a) - 1)) -#define PAGE_ALIGN_UP(x,a) ALIGN(x,a) -#define PAGE_ALIGN_DOWN(x,a) ((x) & ~((a) - 1)) - - struct TextRegion { - void * from; - void * to; - int totalHugePages; - long offset; - bool found_text_region; - char name[PATH_MAX]; - }; +namespace largepages { +#define ALIGN(x, a) (((x) + (a) - 1) & ~((a) - 1)) +#define PAGE_ALIGN_UP(x, a) ALIGN(x, a) +#define PAGE_ALIGN_DOWN(x, a) ((x) & ~((a) - 1)) + +struct TextRegion { + void * from; + void * to; + int totalHugePages; + int64 offset; + bool found_text_region; +}; static void printSystemError(int error) { fprintf(stderr, "Hugepages WARNING: %s\n", strerror(error)); @@ -67,7 +66,7 @@ namespace node { static struct TextRegion find_node_text_region() { FILE *f; - long unsigned int start, end, offset, inode; + unsigned int64 start, end, offset, inode; char perm[5], dev[6], name[256]; int ret; const size_t hugePageSize = 2L * 1024 * 1024; @@ -82,11 +81,10 @@ namespace node { if (ret == 7 && perm[0] == 'r' && perm[1] == '-' && perm[2] == 'x') { - // Checking if the region is from node binary and executable - start = (unsigned int long) &__nodetext; - char *from = (char *)PAGE_ALIGN_UP(start, hugePageSize); - char *to = (char *)PAGE_ALIGN_DOWN(end, hugePageSize); + start = (unsigned int64) &__nodetext; + char *from = reinterpret_castPAGE_ALIGN_UP(start, hugePageSize); + char *to = reinterpret_castPAGE_ALIGN_DOWN(end, hugePageSize); if (from < to) { size_t size = (intptr_t)to - (intptr_t)from; @@ -95,7 +93,6 @@ namespace node { nregion.to = to; nregion.offset = offset; nregion.totalHugePages = size/hugePageSize; - strcpy(nregion.name,name); return nregion; } } @@ -108,13 +105,13 @@ namespace node { ifs.open("/sys/kernel/mm/transparent_hugepage/enabled"); if (!ifs) { - fprintf(stderr, "Hugepages WARNING: Couldn't check hugepages support\n"); + fprintf(stderr, "WARNING: Couldn't check hugepages support\n"); return false; } std::string always, madvise, never; if (ifs.is_open()) { - while(ifs >> always >> madvise >> never) ; + while (ifs >> always >> madvise >> never) {} } int ret_status = false; @@ -123,6 +120,8 @@ namespace node { ret_status = true; else if (madvise.compare("[madvise]") == 0) ret_status = true; + else if (never.compare("[never]") == 0) + ret_status = false; ifs.close(); return ret_status; @@ -132,9 +131,9 @@ namespace node { int ret_status = false; std::string kw; std::ifstream file("/proc/meminfo"); - while(file >> kw) { - if(kw == "HugePages_Total:") { - unsigned long hp_tot; + while (file >> kw) { + if (kw == "HugePages_Total:") { + unsigned int64 hp_tot; file >> hp_tot; if (hp_tot > 0) ret_status = true; @@ -162,14 +161,14 @@ namespace node { __attribute__((__noinline__)) __attribute__((__optimize__("2"))) move_text_region_to_large_pages(struct TextRegion r) { - void *nmem = NULL, *tmem = NULL; - int ret=0; + void *nmem = nullptr, *tmem = nullptr; + int ret = 0; size_t size = (intptr_t)r.to - (intptr_t)r.from; void *start = r.from; // Allocate temporary region preparing for copy - nmem = mmap(NULL, size, + nmem = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (nmem == MAP_FAILED) { printSystemError(errno); @@ -237,7 +236,7 @@ namespace node { return -1; } - if (n.to <= (void *) & move_text_region_to_large_pages) + if (n.to <= reinterpret_cast & move_text_region_to_large_pages) return move_text_region_to_large_pages(n); return -1; @@ -246,5 +245,6 @@ namespace node { bool isLargePagesEnabled() { return isExplicitHugePagesEnabled() || isTransparentHugePagesEnabled(); } - } -} + +} // namespace largepages +} // namespace node diff --git a/src/node_large_page.h b/src/node_large_page.h index 70537047eb5d5f..1f53f893c601f8 100644 --- a/src/node_large_page.h +++ b/src/node_large_page.h @@ -1,36 +1,34 @@ -/* -Copyright (C) 2018 Intel Corporation - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), -to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, -and/or sell copies of the Software, and to permit persons to whom -the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included -in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES -OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE -OR OTHER DEALINGS IN THE SOFTWARE. - -SPDX-License-Identifier: MIT -*/ - -#ifndef NODE_LARGEPAGE_H_ -#define NODE_LARGEPAGE_H_ +// Copyright (C) 2018 Intel Corporation +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom +// the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +// +// SPDX-License-Identifier: MIT + +#ifndef SRC_NODE_LARGE_PAGE_H_ +#define SRC_NODE_LARGE_PAGE_H_ namespace node { - namespace largepages { - bool isLargePagesEnabled(); - int map_static_code_to_large_pages(); - } // namespace largepages -} // namespace node +namespace largepages { + bool isLargePagesEnabled(); + int map_static_code_to_large_pages(); +} // namespace largepages +} // namespace node -#endif // NODE_LARGE_PAGE_H_ +#endif // SRC_NODE_LARGE_PAGE_H_ From 2af82b1e465acd5e31794012be1e11665dd5aff3 Mon Sep 17 00:00:00 2001 From: Uttam Pawar Date: Fri, 25 May 2018 17:09:21 -0700 Subject: [PATCH 11/24] Added one more condition check for verify the start address of newly remapped region --- src/node.cc | 1 + src/node_large_page.cc | 9 ++++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/node.cc b/src/node.cc index 3d3a92247bed4a..3cf9c3c8526b83 100644 --- a/src/node.cc +++ b/src/node.cc @@ -4489,6 +4489,7 @@ int Start(int argc, char** argv) { if (node::largepages::isLargePagesEnabled()) { if ((node::largepages::map_static_code_to_large_pages()) != 0) { fprintf(stderr, "Mapping of static code to large pages failed.\n"); + fprintf(stderr, "Reverting to default page size\n"); } } #endif diff --git a/src/node_large_page.cc b/src/node_large_page.cc index 820cfed651d51f..5b2bb485b36463 100644 --- a/src/node_large_page.cc +++ b/src/node_large_page.cc @@ -188,7 +188,14 @@ struct TextRegion { return -1; } - ret = madvise(start, size, MADV_HUGEPAGE); + if (tmem != start) { + fprintf(stderr, "Unable to allocate hugepages.n"); + munmap(nmem, size); + munmap(tmem, size); + return -1; + } + + ret = madvise(tmem, size, MADV_HUGEPAGE); if (ret == -1) { printSystemError(errno); ret = munmap(tmem, size); From cae928581a1de05969f2fbaa6850c3395eaa41e7 Mon Sep 17 00:00:00 2001 From: Uttam Pawar Date: Wed, 30 May 2018 15:25:50 -0700 Subject: [PATCH 12/24] Fixed syntax error due to wring data type. int64 to int64_t --- src/node_large_page.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/node_large_page.cc b/src/node_large_page.cc index 5b2bb485b36463..7dda57b3da5844 100644 --- a/src/node_large_page.cc +++ b/src/node_large_page.cc @@ -55,7 +55,7 @@ struct TextRegion { void * from; void * to; int totalHugePages; - int64 offset; + int64_t offset; bool found_text_region; }; @@ -66,7 +66,7 @@ struct TextRegion { static struct TextRegion find_node_text_region() { FILE *f; - unsigned int64 start, end, offset, inode; + int64_t start, end, offset, inode; char perm[5], dev[6], name[256]; int ret; const size_t hugePageSize = 2L * 1024 * 1024; @@ -82,7 +82,7 @@ struct TextRegion { if (ret == 7 && perm[0] == 'r' && perm[1] == '-' && perm[2] == 'x') { // Checking if the region is from node binary and executable - start = (unsigned int64) &__nodetext; + start = (unsigned int64_t) &__nodetext; char *from = reinterpret_castPAGE_ALIGN_UP(start, hugePageSize); char *to = reinterpret_castPAGE_ALIGN_DOWN(end, hugePageSize); @@ -133,7 +133,7 @@ struct TextRegion { std::ifstream file("/proc/meminfo"); while (file >> kw) { if (kw == "HugePages_Total:") { - unsigned int64 hp_tot; + int64_t hp_tot; file >> hp_tot; if (hp_tot > 0) ret_status = true; @@ -243,7 +243,7 @@ struct TextRegion { return -1; } - if (n.to <= reinterpret_cast & move_text_region_to_large_pages) + if (n.to <= reinterpret_cast (&move_text_region_to_large_pages)) return move_text_region_to_large_pages(n); return -1; From 30114b63628570b242f7e38ef75d432855b0db5e Mon Sep 17 00:00:00 2001 From: Suresh Srinivas Date: Wed, 30 May 2018 15:54:12 -0700 Subject: [PATCH 13/24] Removed explictHugePages code --- src/node_large_page.cc | 74 ++++++++++++++---------------------------- 1 file changed, 25 insertions(+), 49 deletions(-) diff --git a/src/node_large_page.cc b/src/node_large_page.cc index 7dda57b3da5844..172a388837ffe2 100644 --- a/src/node_large_page.cc +++ b/src/node_large_page.cc @@ -127,27 +127,6 @@ struct TextRegion { return ret_status; } - static bool isExplicitHugePagesEnabled() { - int ret_status = false; - std::string kw; - std::ifstream file("/proc/meminfo"); - while (file >> kw) { - if (kw == "HugePages_Total:") { - int64_t hp_tot; - file >> hp_tot; - if (hp_tot > 0) - ret_status = true; - else - ret_status = false; - - break; - } - } - - file.close(); - return ret_status; - } - // Moving the text region to large pages. We need to be very careful. // a) This function itself should not be moved. // We use a gcc option to put it outside the ".text" section @@ -177,38 +156,35 @@ struct TextRegion { memcpy(nmem, r.from, size); - // use for transparent huge pages if enabled - if (isTransparentHugePagesEnabled()) { - tmem = mmap(start, size, - PROT_READ | PROT_WRITE | PROT_EXEC, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1 , 0); - if (tmem == MAP_FAILED) { - printSystemError(errno); - munmap(nmem, size); - return -1; - } + tmem = mmap(start, size, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1 , 0); + if (tmem == MAP_FAILED) { + printSystemError(errno); + munmap(nmem, size); + return -1; + } - if (tmem != start) { - fprintf(stderr, "Unable to allocate hugepages.n"); - munmap(nmem, size); - munmap(tmem, size); - return -1; - } + if (tmem != start) { + fprintf(stderr, "Unable to allocate hugepages.n"); + munmap(nmem, size); + munmap(tmem, size); + return -1; + } - ret = madvise(tmem, size, MADV_HUGEPAGE); + ret = madvise(tmem, size, MADV_HUGEPAGE); + if (ret == -1) { + printSystemError(errno); + ret = munmap(tmem, size); + if (ret == -1) { + printSystemError(errno); + } + ret = munmap(nmem, size); if (ret == -1) { printSystemError(errno); - ret = munmap(tmem, size); - if (ret == -1) { - printSystemError(errno); - } - ret = munmap(nmem, size); - if (ret == -1) { - printSystemError(errno); - } - - return -1; } + + return -1; } memcpy(start, nmem, size); @@ -250,7 +226,7 @@ struct TextRegion { } bool isLargePagesEnabled() { - return isExplicitHugePagesEnabled() || isTransparentHugePagesEnabled(); + return isTransparentHugePagesEnabled(); } } // namespace largepages From 2f672ee8fe4f9b21331ccaaa364ca9095d08db1b Mon Sep 17 00:00:00 2001 From: Suresh Srinivas Date: Fri, 1 Jun 2018 16:04:18 -0700 Subject: [PATCH 14/24] Update PR based on feedback --- src/node.cc | 6 +-- src/node_large_page.cc | 102 +++++++++++++++++++++++++---------------- src/node_large_page.h | 2 - 3 files changed, 64 insertions(+), 46 deletions(-) diff --git a/src/node.cc b/src/node.cc index 9ad74719b67fe8..4491066deb7a2a 100644 --- a/src/node.cc +++ b/src/node.cc @@ -4379,9 +4379,8 @@ int Start(int argc, char** argv) { CHECK_GT(argc, 0); #ifdef NODE_ENABLE_LARGE_CODE_PAGES - if (node::largepages::isLargePagesEnabled()) { - if ((node::largepages::map_static_code_to_large_pages()) != 0) { - fprintf(stderr, "Mapping of static code to large pages failed.\n"); + if (node::isLargePagesEnabled()) { + if ((node::map_static_code_to_large_pages()) != 0) { fprintf(stderr, "Reverting to default page size\n"); } } @@ -4431,7 +4430,6 @@ int Start(int argc, char** argv) { // will never be fully cleaned up. v8_platform.Dispose(); - delete[] exec_argv; exec_argv = nullptr; diff --git a/src/node_large_page.cc b/src/node_large_page.cc index 172a388837ffe2..b13018a3840bb3 100644 --- a/src/node_large_page.cc +++ b/src/node_large_page.cc @@ -35,28 +35,38 @@ #include #include #include +#include // The functions in this file map the text segment of node into 2M pages. -// The algorithm is quite simple +// The algorithm is simple // 1. Find the text region of node binary in memory +// Examine the /proc/self/maps to determine the currently mapped text +// region and obtain the start and end +// Modify the start to point to the very beginning of node text segment +// (from variable nodetext setup in ld.script) +// Align the address of start and end to Large Page Boundaries +// // 2. Move the text region to large pages +// Map a new area and copy the original code there +// Use mmap using the start address with MAP_FIXED so we get exactly the +// same virtual address +// Use madvise with MADV_HUGE_PAGE to use Anonymous 2M Pages +// If successful copy the code there and unmap the original region. extern char __executable_start; extern char __etext; extern char __nodetext; namespace node { -namespace largepages { #define ALIGN(x, a) (((x) + (a) - 1) & ~((a) - 1)) #define PAGE_ALIGN_UP(x, a) ALIGN(x, a) #define PAGE_ALIGN_DOWN(x, a) ((x) & ~((a) - 1)) struct TextRegion { - void * from; - void * to; - int totalHugePages; - int64_t offset; - bool found_text_region; + void * from; + void * to; + int total_hugepages; + bool found_text_region; }; static void printSystemError(int error) { @@ -64,36 +74,42 @@ struct TextRegion { return; } +// The format of the maps file is the following +// address perms offset dev inode pathname +// 00400000-00452000 r-xp 00000000 08:02 173521 /usr/bin/dbus-daemon + static struct TextRegion find_node_text_region() { - FILE *f; - int64_t start, end, offset, inode; - char perm[5], dev[6], name[256]; - int ret; - const size_t hugePageSize = 2L * 1024 * 1024; + std::ifstream ifs; + std::string map_line; + std::string permission; + char dash; + int64_t start, end; + const size_t hps = 2L * 1024 * 1024; struct TextRegion nregion; nregion.found_text_region = false; - f = fopen("/proc/self/maps", "r"); - ret = fscanf(f, "%lx-%lx %4s %lx %5s %ld %s\n", - &start, &end, perm, &offset, dev, &inode, name); - fclose(f); + ifs.open("/proc/self/maps"); + std::getline(ifs, map_line); + std::istringstream iss(map_line); + ifs.close(); + + iss >> std::hex >> start; + iss >> dash; + iss >> std::hex >> end; + iss >> permission; - if (ret == 7 && - perm[0] == 'r' && perm[1] == '-' && perm[2] == 'x') { - // Checking if the region is from node binary and executable + if (permission.compare("r-xp") == 0) { start = (unsigned int64_t) &__nodetext; - char *from = reinterpret_castPAGE_ALIGN_UP(start, hugePageSize); - char *to = reinterpret_castPAGE_ALIGN_DOWN(end, hugePageSize); + char *from = reinterpret_castPAGE_ALIGN_UP(start, hps); + char *to = reinterpret_castPAGE_ALIGN_DOWN(end, hps); if (from < to) { - size_t size = (intptr_t)to - (intptr_t)from; + size_t size = to - from; nregion.found_text_region = true; nregion.from = from; nregion.to = to; - nregion.offset = offset; - nregion.totalHugePages = size/hugePageSize; - return nregion; + nregion.total_hugepages = size / hps; } } @@ -105,7 +121,8 @@ struct TextRegion { ifs.open("/sys/kernel/mm/transparent_hugepage/enabled"); if (!ifs) { - fprintf(stderr, "WARNING: Couldn't check hugepages support\n"); + fprintf(stderr, "Could not open file: " \ + "/sys/kernel/mm/transparent_hugepage/enabled\n"); return false; } @@ -127,13 +144,15 @@ struct TextRegion { return ret_status; } - // Moving the text region to large pages. We need to be very careful. - // a) This function itself should not be moved. - // We use a gcc option to put it outside the ".text" section - // b) This function should not call any function(s) that might be moved. - // 1. We map a new area and copy the original code there - // 2. We mmap using HUGE_TLB - // 3. If successful we copy the code there and unmap the original region. +// Moving the text region to large pages. We need to be very careful. +// a) This function itself should not be moved. +// We use a gcc option to put it outside the ".text" section +// b) This function should not call any function(s) that might be moved. +// 1. map a new area and copy the original code there +// 2. mmap using the start address with MAP_FIXED so we get exactly +// the same virtual address +// 3. madvise with MADV_HUGE_PAGE +// 3. If successful copy the code there and unmap the original region int __attribute__((__section__(".eh_frame"))) __attribute__((__aligned__(2 * 1024 * 1024))) @@ -156,6 +175,10 @@ struct TextRegion { memcpy(nmem, r.from, size); +// We already know the original page is r-xp +// (PROT_READ, PROT_EXEC, MAP_PRIVATE) +// We want PROT_WRITE because we are writing into it. +// We want it at the fixed address and we use MAP_FIXED. tmem = mmap(start, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1 , 0); @@ -166,7 +189,7 @@ struct TextRegion { } if (tmem != start) { - fprintf(stderr, "Unable to allocate hugepages.n"); + fprintf(stderr, "Unable to allocate hugepages\n"); munmap(nmem, size); munmap(tmem, size); return -1; @@ -213,14 +236,14 @@ struct TextRegion { // This is the primary API called from main int map_static_code_to_large_pages() { - struct TextRegion n = find_node_text_region(); - if (n.found_text_region == false) { - fprintf(stderr, "Hugepages WARNING: failed to map static code\n"); + struct TextRegion r = find_node_text_region(); + if (r.found_text_region == false) { + fprintf(stderr, "Hugepages WARNING: failed to find text region \n"); return -1; } - if (n.to <= reinterpret_cast (&move_text_region_to_large_pages)) - return move_text_region_to_large_pages(n); + if (r.to <= reinterpret_cast (&move_text_region_to_large_pages)) + return move_text_region_to_large_pages(r); return -1; } @@ -229,5 +252,4 @@ struct TextRegion { return isTransparentHugePagesEnabled(); } -} // namespace largepages } // namespace node diff --git a/src/node_large_page.h b/src/node_large_page.h index 1f53f893c601f8..ccc4a800c7d96b 100644 --- a/src/node_large_page.h +++ b/src/node_large_page.h @@ -24,10 +24,8 @@ #define SRC_NODE_LARGE_PAGE_H_ namespace node { -namespace largepages { bool isLargePagesEnabled(); int map_static_code_to_large_pages(); -} // namespace largepages } // namespace node #endif // SRC_NODE_LARGE_PAGE_H_ From 31504ccc36ed34345dcef528a1ffd13d8713fbb0 Mon Sep 17 00:00:00 2001 From: Suresh Srinivas Date: Sun, 3 Jun 2018 08:56:27 -0700 Subject: [PATCH 15/24] Addressing the additional PR feedback --- configure | 7 ++----- src/node_large_page.cc | 12 ++++++++---- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/configure b/configure index a785e8315e62a8..ad5b525299f324 100755 --- a/configure +++ b/configure @@ -941,11 +941,8 @@ def configure_node(o): else: o['variables']['node_use_dtrace'] = 'false' - if flavor == 'linux': - if options.node_use_large_pages: - o['variables']['node_use_large_pages'] = 'true' - else: - o['variables']['node_use_large_pages'] = 'false' + use_large_pages = (flavor == 'linux' and options.node_use_large_pages) + o['variables']['node_use_large_pages'] = b(use_large_pages) if options.no_ifaddrs: o['defines'] += ['SUNOS_NO_IFADDRS'] diff --git a/src/node_large_page.cc b/src/node_large_page.cc index b13018a3840bb3..d637f7454068c9 100644 --- a/src/node_large_page.cc +++ b/src/node_large_page.cc @@ -63,8 +63,8 @@ namespace node { #define PAGE_ALIGN_DOWN(x, a) ((x) & ~((a) - 1)) struct TextRegion { - void * from; - void * to; + char * from; + char * to; int total_hugepages; bool found_text_region; }; @@ -90,6 +90,10 @@ struct TextRegion { nregion.found_text_region = false; ifs.open("/proc/self/maps"); + if (!ifs) { + fprintf(stderr, "Could not open /proc/self/maps\n"); + return nregion; + } std::getline(ifs, map_line); std::istringstream iss(map_line); ifs.close(); @@ -100,7 +104,7 @@ struct TextRegion { iss >> permission; if (permission.compare("r-xp") == 0) { - start = (unsigned int64_t) &__nodetext; + start = reinterpret_cast(&__nodetext); char *from = reinterpret_castPAGE_ALIGN_UP(start, hps); char *to = reinterpret_castPAGE_ALIGN_DOWN(end, hps); @@ -162,7 +166,7 @@ struct TextRegion { void *nmem = nullptr, *tmem = nullptr; int ret = 0; - size_t size = (intptr_t)r.to - (intptr_t)r.from; + size_t size = r.to - r.from; void *start = r.from; // Allocate temporary region preparing for copy From f998c587fb9f2ea1a5f0585c7557d862ec5e2fde Mon Sep 17 00:00:00 2001 From: Suresh Srinivas Date: Sun, 3 Jun 2018 09:28:07 -0700 Subject: [PATCH 16/24] Fix the gypi style issue --- node.gypi | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/node.gypi b/node.gypi index 83b8c6b47e4836..5568d7d23dfefa 100644 --- a/node.gypi +++ b/node.gypi @@ -356,5 +356,6 @@ }, { 'defines': [ 'HAVE_OPENSSL=0' ] }], - ] + + ], } From 9f15cfcee85890d8ef5e3f1aeeedd2eaca4f714e Mon Sep 17 00:00:00 2001 From: Suresh Srinivas Date: Mon, 4 Jun 2018 10:41:07 -0700 Subject: [PATCH 17/24] Update with stylistic changes (eg char* instead of char *, IsLargePageEnabled instead of isLargePageEnabled) --- src/node.cc | 2 +- src/node_large_page.cc | 26 ++++++++++---------------- src/node_large_page.h | 2 +- 3 files changed, 12 insertions(+), 18 deletions(-) diff --git a/src/node.cc b/src/node.cc index 4491066deb7a2a..86835c9d8baa42 100644 --- a/src/node.cc +++ b/src/node.cc @@ -4379,7 +4379,7 @@ int Start(int argc, char** argv) { CHECK_GT(argc, 0); #ifdef NODE_ENABLE_LARGE_CODE_PAGES - if (node::isLargePagesEnabled()) { + if (node::IsLargePagesEnabled()) { if ((node::map_static_code_to_large_pages()) != 0) { fprintf(stderr, "Reverting to default page size\n"); } diff --git a/src/node_large_page.cc b/src/node_large_page.cc index d637f7454068c9..bf91d71f935b7b 100644 --- a/src/node_large_page.cc +++ b/src/node_large_page.cc @@ -63,8 +63,8 @@ namespace node { #define PAGE_ALIGN_DOWN(x, a) ((x) & ~((a) - 1)) struct TextRegion { - char * from; - char * to; + char* from; + char* to; int total_hugepages; bool found_text_region; }; @@ -105,8 +105,8 @@ struct TextRegion { if (permission.compare("r-xp") == 0) { start = reinterpret_cast(&__nodetext); - char *from = reinterpret_castPAGE_ALIGN_UP(start, hps); - char *to = reinterpret_castPAGE_ALIGN_DOWN(end, hps); + char* from = reinterpret_castPAGE_ALIGN_UP(start, hps); + char* to = reinterpret_castPAGE_ALIGN_DOWN(end, hps); if (from < to) { size_t size = to - from; @@ -120,7 +120,7 @@ struct TextRegion { return nregion; } - static bool isTransparentHugePagesEnabled() { + static bool IsTransparentHugePagesEnabled() { std::ifstream ifs; ifs.open("/sys/kernel/mm/transparent_hugepage/enabled"); @@ -163,11 +163,12 @@ struct TextRegion { __attribute__((__noinline__)) __attribute__((__optimize__("2"))) move_text_region_to_large_pages(struct TextRegion r) { - void *nmem = nullptr, *tmem = nullptr; + void* nmem = nullptr; + void* tmem = nullptr; int ret = 0; size_t size = r.to - r.from; - void *start = r.from; + void* start = r.from; // Allocate temporary region preparing for copy nmem = mmap(nullptr, size, @@ -192,13 +193,6 @@ struct TextRegion { return -1; } - if (tmem != start) { - fprintf(stderr, "Unable to allocate hugepages\n"); - munmap(nmem, size); - munmap(tmem, size); - return -1; - } - ret = madvise(tmem, size, MADV_HUGEPAGE); if (ret == -1) { printSystemError(errno); @@ -252,8 +246,8 @@ struct TextRegion { return -1; } - bool isLargePagesEnabled() { - return isTransparentHugePagesEnabled(); + bool IsLargePagesEnabled() { + return IsTransparentHugePagesEnabled(); } } // namespace node diff --git a/src/node_large_page.h b/src/node_large_page.h index ccc4a800c7d96b..711807453b90d0 100644 --- a/src/node_large_page.h +++ b/src/node_large_page.h @@ -24,7 +24,7 @@ #define SRC_NODE_LARGE_PAGE_H_ namespace node { - bool isLargePagesEnabled(); + bool IsLargePagesEnabled(); int map_static_code_to_large_pages(); } // namespace node From d6de361ecb1bd412f11f73781ef8b58cfd3f047e Mon Sep 17 00:00:00 2001 From: Suresh Srinivas Date: Mon, 4 Jun 2018 13:09:46 -0700 Subject: [PATCH 18/24] Add additional guard so large pages is only under Linux and target_arch=x64 --- node.gypi | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/node.gypi b/node.gypi index 5568d7d23dfefa..106d2f95f95f11 100644 --- a/node.gypi +++ b/node.gypi @@ -188,7 +188,9 @@ }, }, 'conditions': [ - ['OS!="aix" and node_shared=="false" and node_use_large_pages=="true"', { + ['OS=="linux" and node_shared=="false" \ + and target_arch=="x64" \ + and node_use_large_pages=="true"', { 'ldflags': [ '-Wl,-T <(PRODUCT_DIR)/../../ld.script', '-Wl,--whole-archive,<(obj_dir)/deps/uv/<(STATIC_LIB_PREFIX)' @@ -196,7 +198,8 @@ '-Wl,--no-whole-archive', ] }], - ['OS!="aix" and node_shared=="false" and node_use_large_pages=="false"', { + ['OS!="aix" and node_shared=="false" \ + and node_use_large_pages=="false"', { 'ldflags': [ '-Wl,--whole-archive,<(obj_dir)/deps/uv/<(STATIC_LIB_PREFIX)' 'uv<(STATIC_LIB_SUFFIX)', From 29c7d13aadd7984454281e1a4f4a6c771a9aa18a Mon Sep 17 00:00:00 2001 From: Suresh Srinivas Date: Mon, 4 Jun 2018 15:06:02 -0700 Subject: [PATCH 19/24] Style fixes according to the Node C++ Style Guide i) CamelCase for methods, functions ii) snake_case for variables/structs iii) indentation (dont indent code inside namespace) --- src/node.cc | 2 +- src/node_large_page.cc | 302 ++++++++++++++++++++--------------------- src/node_large_page.h | 2 +- 3 files changed, 153 insertions(+), 153 deletions(-) diff --git a/src/node.cc b/src/node.cc index 86835c9d8baa42..75e9ccc7924e6a 100644 --- a/src/node.cc +++ b/src/node.cc @@ -4380,7 +4380,7 @@ int Start(int argc, char** argv) { #ifdef NODE_ENABLE_LARGE_CODE_PAGES if (node::IsLargePagesEnabled()) { - if ((node::map_static_code_to_large_pages()) != 0) { + if ((node::MapStaticCodeToLargePages()) != 0) { fprintf(stderr, "Reverting to default page size\n"); } } diff --git a/src/node_large_page.cc b/src/node_large_page.cc index bf91d71f935b7b..cd81feb7dc6a18 100644 --- a/src/node_large_page.cc +++ b/src/node_large_page.cc @@ -62,91 +62,91 @@ namespace node { #define PAGE_ALIGN_UP(x, a) ALIGN(x, a) #define PAGE_ALIGN_DOWN(x, a) ((x) & ~((a) - 1)) -struct TextRegion { +struct text_region { char* from; char* to; - int total_hugepages; - bool found_text_region; + int total_hugepages; + bool found_text_region; }; - static void printSystemError(int error) { - fprintf(stderr, "Hugepages WARNING: %s\n", strerror(error)); - return; - } +static void PrintSystemError(int error) { + fprintf(stderr, "Hugepages WARNING: %s\n", strerror(error)); + return; +} // The format of the maps file is the following // address perms offset dev inode pathname // 00400000-00452000 r-xp 00000000 08:02 173521 /usr/bin/dbus-daemon - static struct TextRegion find_node_text_region() { - std::ifstream ifs; - std::string map_line; - std::string permission; - char dash; - int64_t start, end; - const size_t hps = 2L * 1024 * 1024; - struct TextRegion nregion; - - nregion.found_text_region = false; - - ifs.open("/proc/self/maps"); - if (!ifs) { - fprintf(stderr, "Could not open /proc/self/maps\n"); - return nregion; - } - std::getline(ifs, map_line); - std::istringstream iss(map_line); - ifs.close(); - - iss >> std::hex >> start; - iss >> dash; - iss >> std::hex >> end; - iss >> permission; - - if (permission.compare("r-xp") == 0) { - start = reinterpret_cast(&__nodetext); - char* from = reinterpret_castPAGE_ALIGN_UP(start, hps); - char* to = reinterpret_castPAGE_ALIGN_DOWN(end, hps); - - if (from < to) { - size_t size = to - from; - nregion.found_text_region = true; - nregion.from = from; - nregion.to = to; - nregion.total_hugepages = size / hps; - } - } - - return nregion; +static struct text_region FindNodeTextRegion() { + std::ifstream ifs; + std::string map_line; + std::string permission; + char dash; + int64_t start, end; + const size_t hps = 2L * 1024 * 1024; + struct text_region nregion; + + nregion.found_text_region = false; + + ifs.open("/proc/self/maps"); + if (!ifs) { + fprintf(stderr, "Could not open /proc/self/maps\n"); + return nregion; + } + std::getline(ifs, map_line); + std::istringstream iss(map_line); + ifs.close(); + + iss >> std::hex >> start; + iss >> dash; + iss >> std::hex >> end; + iss >> permission; + + if (permission.compare("r-xp") == 0) { + start = reinterpret_cast(&__nodetext); + char* from = reinterpret_castPAGE_ALIGN_UP(start, hps); + char* to = reinterpret_castPAGE_ALIGN_DOWN(end, hps); + + if (from < to) { + size_t size = to - from; + nregion.found_text_region = true; + nregion.from = from; + nregion.to = to; + nregion.total_hugepages = size / hps; } + } - static bool IsTransparentHugePagesEnabled() { - std::ifstream ifs; + return nregion; +} - ifs.open("/sys/kernel/mm/transparent_hugepage/enabled"); - if (!ifs) { - fprintf(stderr, "Could not open file: " \ - "/sys/kernel/mm/transparent_hugepage/enabled\n"); - return false; - } +static bool IsTransparentHugePagesEnabled() { + std::ifstream ifs; - std::string always, madvise, never; - if (ifs.is_open()) { - while (ifs >> always >> madvise >> never) {} - } + ifs.open("/sys/kernel/mm/transparent_hugepage/enabled"); + if (!ifs) { + fprintf(stderr, "Could not open file: " \ + "/sys/kernel/mm/transparent_hugepage/enabled\n"); + return false; + } - int ret_status = false; + std::string always, madvise, never; + if (ifs.is_open()) { + while (ifs >> always >> madvise >> never) {} + } - if (always.compare("[always]") == 0) - ret_status = true; - else if (madvise.compare("[madvise]") == 0) - ret_status = true; - else if (never.compare("[never]") == 0) - ret_status = false; + int ret_status = false; - ifs.close(); - return ret_status; - } + if (always.compare("[always]") == 0) + ret_status = true; + else if (madvise.compare("[madvise]") == 0) + ret_status = true; + else if (never.compare("[never]") == 0) + ret_status = false; + + ifs.close(); + return ret_status; +} // Moving the text region to large pages. We need to be very careful. // a) This function itself should not be moved. @@ -157,97 +157,97 @@ struct TextRegion { // the same virtual address // 3. madvise with MADV_HUGE_PAGE // 3. If successful copy the code there and unmap the original region - int - __attribute__((__section__(".eh_frame"))) - __attribute__((__aligned__(2 * 1024 * 1024))) - __attribute__((__noinline__)) - __attribute__((__optimize__("2"))) - move_text_region_to_large_pages(struct TextRegion r) { - void* nmem = nullptr; - void* tmem = nullptr; - int ret = 0; - - size_t size = r.to - r.from; - void* start = r.from; - - // Allocate temporary region preparing for copy - nmem = mmap(nullptr, size, - PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (nmem == MAP_FAILED) { - printSystemError(errno); - return -1; - } - - memcpy(nmem, r.from, size); +int +__attribute__((__section__(".eh_frame"))) +__attribute__((__aligned__(2 * 1024 * 1024))) +__attribute__((__noinline__)) +__attribute__((__optimize__("2"))) +MoveTextRegionToLargePages(struct text_region r) { + void* nmem = nullptr; + void* tmem = nullptr; + int ret = 0; + + size_t size = r.to - r.from; + void* start = r.from; + + // Allocate temporary region preparing for copy + nmem = mmap(nullptr, size, + PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (nmem == MAP_FAILED) { + PrintSystemError(errno); + return -1; + } + + memcpy(nmem, r.from, size); // We already know the original page is r-xp // (PROT_READ, PROT_EXEC, MAP_PRIVATE) // We want PROT_WRITE because we are writing into it. // We want it at the fixed address and we use MAP_FIXED. - tmem = mmap(start, size, - PROT_READ | PROT_WRITE | PROT_EXEC, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1 , 0); - if (tmem == MAP_FAILED) { - printSystemError(errno); - munmap(nmem, size); - return -1; - } - - ret = madvise(tmem, size, MADV_HUGEPAGE); - if (ret == -1) { - printSystemError(errno); - ret = munmap(tmem, size); - if (ret == -1) { - printSystemError(errno); - } - ret = munmap(nmem, size); - if (ret == -1) { - printSystemError(errno); - } - - return -1; - } - - memcpy(start, nmem, size); - ret = mprotect(start, size, PROT_READ | PROT_EXEC); - if (ret == -1) { - printSystemError(errno); - ret = munmap(tmem, size); - if (ret == -1) { - printSystemError(errno); - } - ret = munmap(nmem, size); - if (ret == -1) { - printSystemError(errno); - } - return -1; - } - - // Release the old/temporary mapped region - ret = munmap(nmem, size); - if (ret == -1) { - printSystemError(errno); - } - - return ret; + tmem = mmap(start, size, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1 , 0); + if (tmem == MAP_FAILED) { + PrintSystemError(errno); + munmap(nmem, size); + return -1; + } + + ret = madvise(tmem, size, MADV_HUGEPAGE); + if (ret == -1) { + PrintSystemError(errno); + ret = munmap(tmem, size); + if (ret == -1) { + PrintSystemError(errno); + } + ret = munmap(nmem, size); + if (ret == -1) { + PrintSystemError(errno); } - // This is the primary API called from main - int map_static_code_to_large_pages() { - struct TextRegion r = find_node_text_region(); - if (r.found_text_region == false) { - fprintf(stderr, "Hugepages WARNING: failed to find text region \n"); - return -1; - } - - if (r.to <= reinterpret_cast (&move_text_region_to_large_pages)) - return move_text_region_to_large_pages(r); + return -1; + } - return -1; + memcpy(start, nmem, size); + ret = mprotect(start, size, PROT_READ | PROT_EXEC); + if (ret == -1) { + PrintSystemError(errno); + ret = munmap(tmem, size); + if (ret == -1) { + PrintSystemError(errno); } - - bool IsLargePagesEnabled() { - return IsTransparentHugePagesEnabled(); + ret = munmap(nmem, size); + if (ret == -1) { + PrintSystemError(errno); } + return -1; + } + + // Release the old/temporary mapped region + ret = munmap(nmem, size); + if (ret == -1) { + PrintSystemError(errno); + } + + return ret; +} + +// This is the primary API called from main +int MapStaticCodeToLargePages() { + struct text_region r = FindNodeTextRegion(); + if (r.found_text_region == false) { + fprintf(stderr, "Hugepages WARNING: failed to find text region \n"); + return -1; + } + + if (r.to <= reinterpret_cast (&MoveTextRegionToLargePages)) + return MoveTextRegionToLargePages(r); + + return -1; +} + +bool IsLargePagesEnabled() { + return IsTransparentHugePagesEnabled(); +} } // namespace node diff --git a/src/node_large_page.h b/src/node_large_page.h index 711807453b90d0..231238dc56c520 100644 --- a/src/node_large_page.h +++ b/src/node_large_page.h @@ -25,7 +25,7 @@ namespace node { bool IsLargePagesEnabled(); - int map_static_code_to_large_pages(); + int MapStaticCodeToLargePages(); } // namespace node #endif // SRC_NODE_LARGE_PAGE_H_ From 9828036e20a01ee9c641ab04af30ecbe29cd653c Mon Sep 17 00:00:00 2001 From: Suresh Srinivas Date: Thu, 7 Jun 2018 11:27:58 -0700 Subject: [PATCH 20/24] Eliminate ld.script and use implicit script GNU ld has the INSERT option which makes the externally supported script not override the default script but insert the code into the default. The new ld.implicit.script does just that and no modification to the default script is necessary anymore. PROVIDE (_nodetext = .); PROVIDE (nodetext = .); INSERT BEFORE .text; --- ld.implicit.script | 5 + ld.script | 233 --------------------------------------------- node.gypi | 2 +- 3 files changed, 6 insertions(+), 234 deletions(-) create mode 100644 ld.implicit.script delete mode 100644 ld.script diff --git a/ld.implicit.script b/ld.implicit.script new file mode 100644 index 00000000000000..50d3209f0df82a --- /dev/null +++ b/ld.implicit.script @@ -0,0 +1,5 @@ + PROVIDE (__nodetext = .); + PROVIDE (_nodetext = .); + PROVIDE (nodetext = .); + INSERT BEFORE .text; + diff --git a/ld.script b/ld.script deleted file mode 100644 index a75325802a2438..00000000000000 --- a/ld.script +++ /dev/null @@ -1,233 +0,0 @@ -/* Script for -z combreloc: combine and sort reloc sections */ -/* Copyright (C) 2014-2015 Free Software Foundation, Inc. - Copying and distribution of this script, with or without modification, - are permitted in any medium without royalty provided the copyright - notice and this notice are preserved. */ -OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", - "elf64-x86-64") -OUTPUT_ARCH(i386:x86-64) -ENTRY(_start) -SEARCH_DIR("=/usr/local/lib/x86_64-linux-gnu"); SEARCH_DIR("=/lib/x86_64-linux-gnu"); SEARCH_DIR("=/usr/lib/x86_64-linux-gnu"); SEARCH_DIR("=/usr/local/lib64"); SEARCH_DIR("=/lib64"); SEARCH_DIR("=/usr/lib64"); SEARCH_DIR("=/usr/local/lib"); SEARCH_DIR("=/lib"); SEARCH_DIR("=/usr/lib"); SEARCH_DIR("=/usr/x86_64-linux-gnu/lib64"); SEARCH_DIR("=/usr/x86_64-linux-gnu/lib"); -SECTIONS -{ - /* Read-only sections, merged into text segment: */ - PROVIDE (__executable_start = SEGMENT_START("text-segment", 0x400000)); . = SEGMENT_START("text-segment", 0x400000) + SIZEOF_HEADERS; - .interp : { *(.interp) } - .note.gnu.build-id : { *(.note.gnu.build-id) } - .hash : { *(.hash) } - .gnu.hash : { *(.gnu.hash) } - .dynsym : { *(.dynsym) } - .dynstr : { *(.dynstr) } - .gnu.version : { *(.gnu.version) } - .gnu.version_d : { *(.gnu.version_d) } - .gnu.version_r : { *(.gnu.version_r) } - .rela.dyn : - { - *(.rela.init) - *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*) - *(.rela.fini) - *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*) - *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*) - *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*) - *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*) - *(.rela.ctors) - *(.rela.dtors) - *(.rela.got) - *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*) - *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.*) - *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.*) - *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.*) - *(.rela.ifunc) - } - .rela.plt : - { - *(.rela.plt) - PROVIDE_HIDDEN (__rela_iplt_start = .); - *(.rela.iplt) - PROVIDE_HIDDEN (__rela_iplt_end = .); - } - .init : - { - KEEP (*(SORT_NONE(.init))) - } - .plt : { *(.plt) *(.iplt) } -.plt.got : { *(.plt.got) } -.plt.bnd : { *(.plt.bnd) } - PROVIDE (__nodetext = .); - PROVIDE (_nodetext = .); - PROVIDE (nodetext = .); - .text : - { - *(.text.unlikely .text.*_unlikely .text.unlikely.*) - *(.text.exit .text.exit.*) - *(.text.startup .text.startup.*) - *(.text.hot .text.hot.*) - *(.text .stub .text.* .gnu.linkonce.t.*) - /* .gnu.warning sections are handled specially by elf32.em. */ - *(.gnu.warning) - } - .fini : - { - KEEP (*(SORT_NONE(.fini))) - } - PROVIDE (__etext = .); - PROVIDE (_etext = .); - PROVIDE (etext = .); - .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } - .rodata1 : { *(.rodata1) } - .eh_frame_hdr : { *(.eh_frame_hdr) *(.eh_frame_entry .eh_frame_entry.*) } - .eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) *(.eh_frame.*) } - .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table - .gcc_except_table.*) } - .gnu_extab : ONLY_IF_RO { *(.gnu_extab*) } - /* These sections are generated by the Sun/Oracle C++ compiler. */ - .exception_ranges : ONLY_IF_RO { *(.exception_ranges - .exception_ranges*) } - /* Adjust the address for the data segment. We want to adjust up to - the same address within the page on the next page up. */ - . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE)); - /* Exception handling */ - .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) *(.eh_frame.*) } - .gnu_extab : ONLY_IF_RW { *(.gnu_extab) } - .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) } - .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) } - /* Thread Local Storage sections */ - .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) } - .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) } - .preinit_array : - { - PROVIDE_HIDDEN (__preinit_array_start = .); - KEEP (*(.preinit_array)) - PROVIDE_HIDDEN (__preinit_array_end = .); - } - .init_array : - { - PROVIDE_HIDDEN (__init_array_start = .); - KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*))) - KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin?.o *crtend.o *crtend?.o ) .ctors)) - PROVIDE_HIDDEN (__init_array_end = .); - } - .fini_array : - { - PROVIDE_HIDDEN (__fini_array_start = .); - KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*))) - KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin?.o *crtend.o *crtend?.o ) .dtors)) - PROVIDE_HIDDEN (__fini_array_end = .); - } - .ctors : - { - /* gcc uses crtbegin.o to find the start of - the constructors, so we make sure it is - first. Because this is a wildcard, it - doesn't matter if the user does not - actually link against crtbegin.o; the - linker won't look for a file to match a - wildcard. The wildcard also means that it - doesn't matter which directory crtbegin.o - is in. */ - KEEP (*crtbegin.o(.ctors)) - KEEP (*crtbegin?.o(.ctors)) - /* We don't want to include the .ctor section from - the crtend.o file until after the sorted ctors. - The .ctor section from the crtend file contains the - end of ctors marker and it must be last */ - KEEP (*(EXCLUDE_FILE (*crtend.o *crtend?.o ) .ctors)) - KEEP (*(SORT(.ctors.*))) - KEEP (*(.ctors)) - } - .dtors : - { - KEEP (*crtbegin.o(.dtors)) - KEEP (*crtbegin?.o(.dtors)) - KEEP (*(EXCLUDE_FILE (*crtend.o *crtend?.o ) .dtors)) - KEEP (*(SORT(.dtors.*))) - KEEP (*(.dtors)) - } - .jcr : { KEEP (*(.jcr)) } - .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) } - .dynamic : { *(.dynamic) } - .got : { *(.got) *(.igot) } - . = DATA_SEGMENT_RELRO_END (SIZEOF (.got.plt) >= 24 ? 24 : 0, .); - .got.plt : { *(.got.plt) *(.igot.plt) } - .data : - { - *(.data .data.* .gnu.linkonce.d.*) - SORT(CONSTRUCTORS) - } - .data1 : { *(.data1) } - _edata = .; PROVIDE (edata = .); - . = .; - __bss_start = .; - .bss : - { - *(.dynbss) - *(.bss .bss.* .gnu.linkonce.b.*) - *(COMMON) - /* Align here to ensure that the .bss section occupies space up to - _end. Align after .bss to ensure correct alignment even if the - .bss section disappears because there are no input sections. - FIXME: Why do we need it? When there is no .bss section, we don't - pad the .data section. */ - . = ALIGN(. != 0 ? 64 / 8 : 1); - } - .lbss : - { - *(.dynlbss) - *(.lbss .lbss.* .gnu.linkonce.lb.*) - *(LARGE_COMMON) - } - . = ALIGN(64 / 8); - . = SEGMENT_START("ldata-segment", .); - .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) : - { - *(.lrodata .lrodata.* .gnu.linkonce.lr.*) - } - .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) : - { - *(.ldata .ldata.* .gnu.linkonce.l.*) - . = ALIGN(. != 0 ? 64 / 8 : 1); - } - . = ALIGN(64 / 8); - _end = .; PROVIDE (end = .); - . = DATA_SEGMENT_END (.); - /* Stabs debugging sections. */ - .stab 0 : { *(.stab) } - .stabstr 0 : { *(.stabstr) } - .stab.excl 0 : { *(.stab.excl) } - .stab.exclstr 0 : { *(.stab.exclstr) } - .stab.index 0 : { *(.stab.index) } - .stab.indexstr 0 : { *(.stab.indexstr) } - .comment 0 : { *(.comment) } - /* DWARF debug sections. - Symbols in the DWARF debugging sections are relative to the beginning - of the section so we begin them at 0. */ - /* DWARF 1 */ - .debug 0 : { *(.debug) } - .line 0 : { *(.line) } - /* GNU DWARF 1 extensions */ - .debug_srcinfo 0 : { *(.debug_srcinfo) } - .debug_sfnames 0 : { *(.debug_sfnames) } - /* DWARF 1.1 and DWARF 2 */ - .debug_aranges 0 : { *(.debug_aranges) } - .debug_pubnames 0 : { *(.debug_pubnames) } - /* DWARF 2 */ - .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } - .debug_abbrev 0 : { *(.debug_abbrev) } - .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) } - .debug_frame 0 : { *(.debug_frame) } - .debug_str 0 : { *(.debug_str) } - .debug_loc 0 : { *(.debug_loc) } - .debug_macinfo 0 : { *(.debug_macinfo) } - /* SGI/MIPS DWARF 2 extensions */ - .debug_weaknames 0 : { *(.debug_weaknames) } - .debug_funcnames 0 : { *(.debug_funcnames) } - .debug_typenames 0 : { *(.debug_typenames) } - .debug_varnames 0 : { *(.debug_varnames) } - /* DWARF 3 */ - .debug_pubtypes 0 : { *(.debug_pubtypes) } - .debug_ranges 0 : { *(.debug_ranges) } - /* DWARF Extension. */ - .debug_macro 0 : { *(.debug_macro) } - .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } - /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) } -} diff --git a/node.gypi b/node.gypi index 106d2f95f95f11..8fe15eb7cd7b85 100644 --- a/node.gypi +++ b/node.gypi @@ -192,7 +192,7 @@ and target_arch=="x64" \ and node_use_large_pages=="true"', { 'ldflags': [ - '-Wl,-T <(PRODUCT_DIR)/../../ld.script', + '-Wl,-T <(PRODUCT_DIR)/../../ld.implicit.script', '-Wl,--whole-archive,<(obj_dir)/deps/uv/<(STATIC_LIB_PREFIX)' 'uv<(STATIC_LIB_SUFFIX)', '-Wl,--no-whole-archive', From 600cf54b24ad00eecdd1fff4bd977a5e5883d6c3 Mon Sep 17 00:00:00 2001 From: Suresh Srinivas Date: Thu, 7 Jun 2018 11:49:20 -0700 Subject: [PATCH 21/24] Add #if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS --- src/node_large_page.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/node_large_page.h b/src/node_large_page.h index 231238dc56c520..e607bbaa50a085 100644 --- a/src/node_large_page.h +++ b/src/node_large_page.h @@ -23,10 +23,14 @@ #ifndef SRC_NODE_LARGE_PAGE_H_ #define SRC_NODE_LARGE_PAGE_H_ +#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS + + namespace node { bool IsLargePagesEnabled(); int MapStaticCodeToLargePages(); } // namespace node +#endif // NODE_WANT_INTERNALS #endif // SRC_NODE_LARGE_PAGE_H_ From bf259e27fc2b13531e00c62073ec5f553f463afe Mon Sep 17 00:00:00 2001 From: Suresh Srinivas Date: Thu, 7 Jun 2018 12:10:45 -0700 Subject: [PATCH 22/24] Fix the test failures The tests were failing due to libgcc_s unwinder getting confused. I was able to debug this problem and linking and using libunwind didnt have any issues. The code MoveTextRegionToLargePages was being placed in eh_frame region initially and this was causing the tests to fail. I now create a new stub region (before .text) in the implicit script and place the MoveTextRegionToLargePages in that region and also ensure that this is ahead of the region we are moving. gdb node info file 0x0000000000a00000 - 0x0000000000a001d3 is .lpstub 0x0000000000a01000 - 0x000000000182a279 is .text --- ld.implicit.script | 3 +++ src/node_large_page.cc | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/ld.implicit.script b/ld.implicit.script index 50d3209f0df82a..ad7ce1b2e2e5cf 100644 --- a/ld.implicit.script +++ b/ld.implicit.script @@ -1,3 +1,6 @@ + SECTIONS { + .lpstub : { *(.lpstub) } + } PROVIDE (__nodetext = .); PROVIDE (_nodetext = .); PROVIDE (nodetext = .); diff --git a/src/node_large_page.cc b/src/node_large_page.cc index cd81feb7dc6a18..8b0e8769c2273f 100644 --- a/src/node_large_page.cc +++ b/src/node_large_page.cc @@ -158,7 +158,7 @@ static bool IsTransparentHugePagesEnabled() { // 3. madvise with MADV_HUGE_PAGE // 3. If successful copy the code there and unmap the original region int -__attribute__((__section__(".eh_frame"))) +__attribute__((__section__(".lpstub"))) __attribute__((__aligned__(2 * 1024 * 1024))) __attribute__((__noinline__)) __attribute__((__optimize__("2"))) @@ -240,7 +240,7 @@ int MapStaticCodeToLargePages() { return -1; } - if (r.to <= reinterpret_cast (&MoveTextRegionToLargePages)) + if (r.from > reinterpret_cast (&MoveTextRegionToLargePages)) return MoveTextRegionToLargePages(r); return -1; From f0a6dcb989c5c1ee9ea9748b4b7de49bd4b95f0f Mon Sep 17 00:00:00 2001 From: Suresh Srinivas Date: Fri, 8 Jun 2018 16:25:30 -0700 Subject: [PATCH 23/24] Add detailed help message to configure --with-largepages --- configure | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/configure b/configure index ad5b525299f324..e1a20381bf4fd0 100755 --- a/configure +++ b/configure @@ -395,7 +395,8 @@ parser.add_option('--with-etw', parser.add_option('--use-largepages', action='store_true', dest='node_use_large_pages', - help='build with Large Pages support (enabled only for Linux).') + help='build with Large Pages support (enabled only for Linux).' + + '(Needs Linux kernel >= 2.6.38 with Transparent Huge pages enabled)') intl_optgroup.add_option('--with-intl', action='store', From 461079359a3fcda9692e78711f28e244fa7784dc Mon Sep 17 00:00:00 2001 From: Suresh Srinivas Date: Tue, 12 Jun 2018 16:48:24 -0700 Subject: [PATCH 24/24] Fix style issues and created inline functions instead of macros for alignment --- src/node_large_page.cc | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/node_large_page.cc b/src/node_large_page.cc index 8b0e8769c2273f..4a910dc392f4f1 100644 --- a/src/node_large_page.cc +++ b/src/node_large_page.cc @@ -58,9 +58,6 @@ extern char __etext; extern char __nodetext; namespace node { -#define ALIGN(x, a) (((x) + (a) - 1) & ~((a) - 1)) -#define PAGE_ALIGN_UP(x, a) ALIGN(x, a) -#define PAGE_ALIGN_DOWN(x, a) ((x) & ~((a) - 1)) struct text_region { char* from; @@ -74,6 +71,14 @@ static void PrintSystemError(int error) { return; } +inline int64_t hugepage_align_up(int64_t addr) { + const size_t hps = 2L * 1024 * 1024; + return (((addr) + (hps) - 1) & ~((hps) - 1)); +} +inline int64_t hugepage_align_down(int64_t addr) { + const size_t hps = 2L * 1024 * 1024; + return ((addr) & ~((hps) - 1)); +} // The format of the maps file is the following // address perms offset dev inode pathname // 00400000-00452000 r-xp 00000000 08:02 173521 /usr/bin/dbus-daemon @@ -105,8 +110,8 @@ static struct text_region FindNodeTextRegion() { if (permission.compare("r-xp") == 0) { start = reinterpret_cast(&__nodetext); - char* from = reinterpret_castPAGE_ALIGN_UP(start, hps); - char* to = reinterpret_castPAGE_ALIGN_DOWN(end, hps); + char* from = reinterpret_cast(hugepage_align_up(start)); + char* to = reinterpret_cast(hugepage_align_down(end)); if (from < to) { size_t size = to - from; @@ -240,13 +245,13 @@ int MapStaticCodeToLargePages() { return -1; } - if (r.from > reinterpret_cast (&MoveTextRegionToLargePages)) + if (r.from > reinterpret_cast (&MoveTextRegionToLargePages)) return MoveTextRegionToLargePages(r); return -1; } -bool IsLargePagesEnabled() { +bool IsLargePagesEnabled() { return IsTransparentHugePagesEnabled(); }