diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 1f8878da14e4a6d6d5803466b61314e05468e52b..6e6ffa5bdd506b815a91babdf4f08557087ee7a8 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,4 +1,4 @@ -image: archlinux/base +image: recolic/cxx-toolkit stages: - build @@ -7,9 +7,9 @@ stages: build: stage: build script: - - pacman -Sy && pacman -S --noconfirm bison flex gcc make gettext sdl2 lib32-glibc grep + - pacman -Sy && pacman -S --noconfirm bison flex gettext sdl2 lib32-glibc grep - export AM_HOME=$(pwd)/nexus-am/ - - cd nemu && make + - cd nemu && make EXTRA_FLAGS='-DDISABLE_MMIO' - show_log=1 ./runall.sh &> testcases.log ; echo $? > testres.log artifacts: paths: @@ -17,6 +17,12 @@ build: - nemu/testcases.log expire_in: 1 week +build-icc-pgo: + stage: build + script: + - pacman -Sy && pacman -S --noconfirm bison flex gettext sdl2 lib32-glibc grep + - export AM_HOME=$(pwd)/nexus-am/ + - cd nemu && ./icc-build.sh test: stage: test diff --git a/nemu/Makefile b/nemu/Makefile index e058045535d99d11ce3bf744ce435c0ce42fc517..8e0081d5d066a6525e5735a73d38bee0d2edf542 100644 --- a/nemu/Makefile +++ b/nemu/Makefile @@ -19,8 +19,9 @@ include Makefile.git CXX ?= g++ LD = $(CXX) INCLUDES = $(addprefix -I, $(INC_DIR)) -CFLAGS += -O2 -MMD -Wall -ggdb3 $(INCLUDES) -fomit-frame-pointer -std=c++17 -CFLAGS += -DDIFF_TEST_QEMU +CFLAGS += -O3 -MMD -Wall $(INCLUDES) -fomit-frame-pointer -std=c++17 +CFLAGS += $(EXTRA_FLAGS) +# CFLAGS += -DDIFF_TEST_QEMU # Source code generation before any targets. SUBDIRS = src/monitor/debug/expr_impl @@ -53,7 +54,7 @@ NEMU_EXEC := $(BINARY) $(ARGS) $(BINARY): $(OBJS) $(call git_commit, "compile") @echo + LD $@ - @$(LD) -O2 -rdynamic $(SO_LDLAGS) -o $@ $^ -lSDL2 -lreadline -ldl + @$(LD) -O2 -rdynamic $(SO_LDLAGS) -o $@ $^ -lSDL2 -lreadline -ldl -pthread run: $(BINARY) $(call git_commit, "run") diff --git a/nemu/icc-build.sh b/nemu/icc-build.sh new file mode 100755 index 0000000000000000000000000000000000000000..3adee30d7dca9b7e4d15d5703504fbb0b6b2c636 --- /dev/null +++ b/nemu/icc-build.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +xflags="-no-ansi-alias -DDISABLE_MMIO -no-complex-limited-range -qopt-prefetch=2" +cpus=$(grep -c '^processor' /proc/cpuinfo) + +make clean && +make EXTRA_FLAGS="$xflags -prof-gen" CXX=icpc -j$cpus && +make -C "$AM_HOME/apps/microbench" ARCH=x86-nemu && +build/nemu -b "$AM_HOME/apps/microbench/build/microbench-x86-nemu.bin" && +make clean && +make EXTRA_FLAGS="$xflags -prof-use" CXX=icpc -j$cpus + +exit $? + diff --git a/nemu/include/common.h b/nemu/include/common.h index b734ebc0e3100c1c84e49fb8e93a1cb011d8eee5..83d7ae959575662c70fa4d9f20b3afeffd98cc32 100644 --- a/nemu/include/common.h +++ b/nemu/include/common.h @@ -1,7 +1,7 @@ #ifndef __COMMON_H__ #define __COMMON_H__ -#define DEBUG +//#define DEBUG //#define DIFF_TEST #if _SHARE diff --git a/nemu/include/macro.h b/nemu/include/macro.h index 68b9399fb591ac99a8541e769c66d78df12e9d6b..2bc2d2b77ec74ecb6a2dde413b3d531610c84ea9 100644 --- a/nemu/include/macro.h +++ b/nemu/include/macro.h @@ -15,4 +15,7 @@ #define RLIB_MACRO_DEBUG_ASSERT(expr) #endif +#define RLIB_MACRO_LIKELY(x) __builtin_expect((x),1) +#define RLIB_MACRO_UNLIKELY(x) __builtin_expect((x),0) + #endif diff --git a/nemu/include/rlib/3rdparty/prettyprint.hpp b/nemu/include/rlib/3rdparty/prettyprint.hpp index ce2226a92b3d753826442b16ee18264e0a755839..106c275e9a7e2bd8a9af54f884fee0ba9a6efb1a 100644 --- a/nemu/include/rlib/3rdparty/prettyprint.hpp +++ b/nemu/include/rlib/3rdparty/prettyprint.hpp @@ -167,11 +167,11 @@ namespace pretty_print template <typename T, typename TChar, typename TCharTraits, typename TDelimiters> template <typename T1, typename T2> - struct print_container_helper<T, TChar, TCharTraits, TDelimiters>::printer<std::pair<T1, T2>> + struct print_container_helper<T, TChar, TCharTraits, TDelimiters>::printer<::std::pair<T1, T2>> { using ostream_type = typename print_container_helper<T, TChar, TCharTraits, TDelimiters>::ostream_type; - static void print_body(const std::pair<T1, T2> & c, ostream_type & stream) + static void print_body(const ::std::pair<T1, T2> & c, ostream_type & stream) { stream << c.first; if (print_container_helper<T, TChar, TCharTraits, TDelimiters>::delimiters_type::values.delimiter != NULL) @@ -184,10 +184,10 @@ namespace pretty_print template <typename T, typename TChar, typename TCharTraits, typename TDelimiters> template <typename ...Args> - struct print_container_helper<T, TChar, TCharTraits, TDelimiters>::printer<std::tuple<Args...>> + struct print_container_helper<T, TChar, TCharTraits, TDelimiters>::printer<::std::tuple<Args...>> { using ostream_type = typename print_container_helper<T, TChar, TCharTraits, TDelimiters>::ostream_type; - using element_type = std::tuple<Args...>; + using element_type = ::std::tuple<Args...>; template <std::size_t I> struct Int { }; @@ -249,10 +249,10 @@ namespace pretty_print struct is_container<std::valarray<T>> : std::true_type { }; template <typename T1, typename T2> - struct is_container<std::pair<T1, T2>> : std::true_type { }; + struct is_container<::std::pair<T1, T2>> : std::true_type { }; template <typename ...Args> - struct is_container<std::tuple<Args...>> : std::true_type { }; + struct is_container<::std::tuple<Args...>> : std::true_type { }; // Default delimiters @@ -316,13 +316,13 @@ namespace pretty_print // Delimiters for pair and tuple - template <typename T1, typename T2> struct delimiters<std::pair<T1, T2>, char> { static const delimiters_values<char> values; }; - template <typename T1, typename T2> const delimiters_values<char> delimiters<std::pair<T1, T2>, char>::values = { "(", ", ", ")" }; + template <typename T1, typename T2> struct delimiters<::std::pair<T1, T2>, char> { static const delimiters_values<char> values; }; + template <typename T1, typename T2> const delimiters_values<char> delimiters<::std::pair<T1, T2>, char>::values = { "(", ", ", ")" }; template <typename T1, typename T2> struct delimiters< ::std::pair<T1, T2>, wchar_t> { static const delimiters_values<wchar_t> values; }; template <typename T1, typename T2> const delimiters_values<wchar_t> delimiters< ::std::pair<T1, T2>, wchar_t>::values = { L"(", L", ", L")" }; - template <typename ...Args> struct delimiters<std::tuple<Args...>, char> { static const delimiters_values<char> values; }; - template <typename ...Args> const delimiters_values<char> delimiters<std::tuple<Args...>, char>::values = { "(", ", ", ")" }; + template <typename ...Args> struct delimiters<::std::tuple<Args...>, char> { static const delimiters_values<char> values; }; + template <typename ...Args> const delimiters_values<char> delimiters<::std::tuple<Args...>, char>::values = { "(", ", ", ")" }; template <typename ...Args> struct delimiters< ::std::tuple<Args...>, wchar_t> { static const delimiters_values<wchar_t> values; }; template <typename ...Args> const delimiters_values<wchar_t> delimiters< ::std::tuple<Args...>, wchar_t>::values = { L"(", L", ", L")" }; diff --git a/nemu/include/util/util.h b/nemu/include/util/util.h index 330a1d97315d45346981fd79c8eb6cfe9bb7e92f..2fe56754c7a2a04a2229baa1902028b4ba93e464 100644 --- a/nemu/include/util/util.h +++ b/nemu/include/util/util.h @@ -11,7 +11,7 @@ namespace rlib { return (int8_t)val; else if constexpr(BytesCount == 2) return (int16_t)val; - else return val; + return val; } } diff --git a/nemu/src/cpu/exec/exec.cc b/nemu/src/cpu/exec/exec.cc index 3c67656de60b0c26d801a851b00a8a3dd5de09d2..f6e847abcce4ca5bc94d63abc7c389f9e799c9d6 100644 --- a/nemu/src/cpu/exec/exec.cc +++ b/nemu/src/cpu/exec/exec.cc @@ -14,10 +14,8 @@ typedef struct { #define EMPTY EX(inv) static inline void set_width(int width) { - if (width == 0) { - width = decoding.is_operand_size_16 ? 2 : 4; - } - decoding.src.width = decoding.dest.width = decoding.src2.width = width; + const auto tmp = width == 0 ? (decoding.is_operand_size_16 ? 2 : 4) : width; + decoding.src.width = decoding.dest.width = decoding.src2.width = tmp; } /* Instruction Decode and EXecute */ @@ -211,7 +209,7 @@ namespace EHelperImpl { idex(eip, &opcode_table[opcode]); } - make_EHelper(real) { + __attribute__((hot)) make_EHelper(real) { uint32_t opcode = instr_fetch(eip, 1); decoding.opcode = opcode; set_width(opcode_table[opcode].width); diff --git a/nemu/src/device/device.cc b/nemu/src/device/device.cc index 560d1b08f36209fa834ee22aa3cce70b1ede2153..b36ae401cc29a4af046bc59ab548579e0d775e9e 100644 --- a/nemu/src/device/device.cc +++ b/nemu/src/device/device.cc @@ -6,13 +6,16 @@ #include <signal.h> #include <SDL2/SDL.h> +#include <thread> +#include <atomic> + #define TIMER_HZ 100 #define VGA_HZ 50 static uint64_t jiffy = 0; static struct itimerval it; -static int device_update_flag = false; -static int update_screen_flag = false; +static std::atomic<bool> device_update_flag(false); +static std::atomic<bool> update_screen_flag(false); void init_serial(); void init_timer(); @@ -37,12 +40,9 @@ static void timer_sig_handler(int signum) { Assert(ret == 0, "Can not set timer"); } -void device_update() { - if (!device_update_flag) { - return; - } - device_update_flag = false; +void device_update() {} // Now an independent thread will do it. +void device_update_impl() { if (update_screen_flag) { update_screen(); update_screen_flag = false; @@ -72,6 +72,16 @@ void device_update() { } } +static void device_update_thread_daemon() { + while(true) { + if(device_update_flag.exchange(false)) { + device_update_impl(); + } + // At most, 1000FPS + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } +} + void sdl_clear_event_queue() { SDL_Event event; while (SDL_PollEvent(&event)); @@ -93,6 +103,8 @@ void init_device() { it.it_value.tv_usec = 1000000 / TIMER_HZ; ret = setitimer(ITIMER_VIRTUAL, &it, NULL); Assert(ret == 0, "Can not set timer"); + + std::thread(device_update_thread_daemon).detach(); } #else diff --git a/nemu/src/device/io/mmio.cc b/nemu/src/device/io/mmio.cc index 529845fe4d038ed724a139a22c0f3f63fbea9ceb..50feaabe59f30028c4afb7d47c0fbc0435a36545 100644 --- a/nemu/src/device/io/mmio.cc +++ b/nemu/src/device/io/mmio.cc @@ -33,9 +33,8 @@ void* add_mmio_map(paddr_t addr, int len, mmio_callback_t callback) { } /* bus interface */ -int is_mmio(paddr_t addr) { - int i; - for (i = 0; i < nr_map; i ++) { +__attribute__((hot)) int is_mmio(paddr_t addr) { + for (int i = 0; i < nr_map; i ++) { if (addr >= maps[i].low && addr <= maps[i].high) { return i; } diff --git a/nemu/src/device/vga.cc b/nemu/src/device/vga.cc index 66445841d2b6d50cda986da93c74f0740338f649..8095b786bb84fac392e771fc34680c39a880632c 100644 --- a/nemu/src/device/vga.cc +++ b/nemu/src/device/vga.cc @@ -18,16 +18,15 @@ static SDL_Texture *texture; static uint32_t (*vmem) [SCREEN_W]; static uint32_t *screensize_port_base; -void update_screen() { - SDL_UpdateTexture(texture, NULL, vmem, SCREEN_W * sizeof(vmem[0][0])); - SDL_RenderClear(renderer); - SDL_RenderCopy(renderer, texture, NULL, NULL); - SDL_RenderPresent(renderer); +inline void SDL_ErrorCheck(int ret) { + if(ret != 0) { + rlib::println("SDL_Error: ret=", ret, ", GETERR=", SDL_GetError()); + } } -void init_vga() { - SDL_Init(SDL_INIT_VIDEO); - SDL_CreateWindowAndRenderer(SCREEN_W * 2, SCREEN_H * 2, 0, &window, &renderer); +static void init_vga_impl() { + SDL_ErrorCheck(SDL_Init(SDL_INIT_VIDEO)); + SDL_ErrorCheck(SDL_CreateWindowAndRenderer(SCREEN_W * 2, SCREEN_H * 2, 0, &window, &renderer)); SDL_SetWindowTitle(window, "NEMU"); texture = SDL_CreateTexture(renderer, SDL_PIXELFORMAT_ARGB8888, SDL_TEXTUREACCESS_STATIC, SCREEN_W, SCREEN_H); @@ -36,4 +35,20 @@ void init_vga() { *screensize_port_base = ((SCREEN_W) << 16) | (SCREEN_H); vmem = reinterpret_cast<decltype(vmem)>(add_mmio_map(VMEM, 0x80000, nullptr)); } + +void update_screen() { +#ifndef DISABLE_MMIO + if(window == nullptr) init_vga_impl(); + SDL_ErrorCheck(SDL_UpdateTexture(texture, NULL, vmem, SCREEN_W * sizeof(vmem[0][0]))); + SDL_ErrorCheck(SDL_RenderClear(renderer)); + SDL_ErrorCheck(SDL_RenderCopy(renderer, texture, NULL, NULL)); + SDL_RenderPresent(renderer); +#endif +} + +void init_vga() { + // Because of fucking SDL design, vga_init should be done in updating thread. + // Do nothing in main thread. +} + #endif /* HAS_IOE */ diff --git a/nemu/src/memory/memory.cc b/nemu/src/memory/memory.cc index 0206211aa00cb15402a95ff951e9d64d1652a2d1..46be639c22a2af66d476b3a3e6b4798dfb48d34b 100644 --- a/nemu/src/memory/memory.cc +++ b/nemu/src/memory/memory.cc @@ -1,4 +1,5 @@ #include "nemu.h" +#include "device/mmio.h" #define PMEM_SIZE (128 * 1024 * 1024) @@ -11,14 +12,35 @@ uint8_t pmem[PMEM_SIZE]; /* Memory accessing interfaces */ -uint32_t paddr_read(paddr_t addr, int len) { - return pmem_rw(addr, uint32_t) & (~0u >> ((4 - len) << 3)); +__attribute__((hot)) uint32_t paddr_read(paddr_t addr, int len) { + static const uint32_t niddle[] = {0, 0xff, 0xffff, 0xffffff, 0xffffffff}; + +#ifndef DISABLE_MMIO + if(const auto mmio_id = is_mmio(addr); RLIB_MACRO_LIKELY(-1 == mmio_id)) { +#endif + return pmem_rw(addr, uint32_t) & niddle[len]; +#ifndef DISABLE_MMIO + } + else { + return mmio_read(addr, len, mmio_id); + } +#endif } void paddr_write(paddr_t addr, uint32_t data, int len) { - memcpy(guest_to_host(addr), &data, len); +#ifndef DISABLE_MMIO + if(const auto mmio_id = is_mmio(addr); RLIB_MACRO_LIKELY(-1 == mmio_id)) { +#endif + memcpy(guest_to_host(addr), &data, len); +#ifndef DISABLE_MMIO + } + else { + mmio_write(addr, len, data, mmio_id); + } +#endif } + // len is Bytes. uint32_t vaddr_read(vaddr_t addr, int len) { return paddr_read(addr, len); diff --git a/nexus-am/am/arch/x86-nemu/src/devices/input.c b/nexus-am/am/arch/x86-nemu/src/devices/input.c index a0634a778d2e26d2d6c72da0998d30e2d0cfebbc..15173f5d216813888cb8f3f5cf22a417e48ff7be 100644 --- a/nexus-am/am/arch/x86-nemu/src/devices/input.c +++ b/nexus-am/am/arch/x86-nemu/src/devices/input.c @@ -3,11 +3,15 @@ #include <amdev.h> size_t input_read(uintptr_t reg, void *buf, size_t size) { + const uint32_t I8042_DATA_PORT = 0x60; switch (reg) { case _DEVREG_INPUT_KBD: { _KbdReg *kbd = (_KbdReg *)buf; - kbd->keydown = 0; - kbd->keycode = _KEY_NONE; + uint32_t press = inl(I8042_DATA_PORT); + kbd->keycode = press; + if(press != _KEY_NONE){ + kbd->keydown = !(kbd->keydown); + } return sizeof(_KbdReg); } } diff --git a/nexus-am/am/arch/x86-nemu/src/devices/video.c b/nexus-am/am/arch/x86-nemu/src/devices/video.c index f56057286afd174c6f843bec4c7418f27da7110f..2baa1f5d5b7c616a550e9e1ac7c18b68f69781b3 100644 --- a/nexus-am/am/arch/x86-nemu/src/devices/video.c +++ b/nexus-am/am/arch/x86-nemu/src/devices/video.c @@ -6,11 +6,13 @@ static uint32_t* const fb __attribute__((used)) = (uint32_t *)0x40000; size_t video_read(uintptr_t reg, void *buf, size_t size) { + const uint32_t SCREEN_PORT = 0x100; switch (reg) { case _DEVREG_VIDEO_INFO: { _VideoInfoReg *info = (_VideoInfoReg *)buf; - info->width = 0; - info->height = 0; + uint32_t screen = inl(SCREEN_PORT); + info->width = screen >> 16; + info->height = screen << 16 >> 16; return sizeof(_VideoInfoReg); } } @@ -21,7 +23,9 @@ size_t video_write(uintptr_t reg, void *buf, size_t size) { switch (reg) { case _DEVREG_VIDEO_FBCTL: { _FBCtlReg *ctl = (_FBCtlReg *)buf; - + for(int i = 0; i < ctl->h; ++i) { + memcpy(fb+(ctl->y+i)*screen_width()+ctl->x,ctl->pixels+i*ctl->w,ctl->w*4); + } if (ctl->sync) { // do nothing, hardware syncs. }