Skip to content
Snippets Groups Projects
Commit b2d955b3 authored by Recolic Keghart's avatar Recolic Keghart
Browse files

Merge branch 'performance-tune' into 'pa2'

Performance tune

See merge request !2
parents 6f8b0120 461d6a49
No related branches found
No related tags found
1 merge request!2Performance tune
Pipeline #813 passed with stages
in 10 minutes and 47 seconds
image: archlinux/base image: recolic/cxx-toolkit
stages: stages:
- build - build
...@@ -7,9 +7,9 @@ stages: ...@@ -7,9 +7,9 @@ stages:
build: build:
stage: build stage: build
script: script:
- pacman -Sy && pacman -S --noconfirm bison flex gcc make gettext sdl2 lib32-glibc grep - pacman -Sy && pacman -S --noconfirm bison flex gettext sdl2 lib32-glibc grep
- export AM_HOME=$(pwd)/nexus-am/ - export AM_HOME=$(pwd)/nexus-am/
- cd nemu && make - cd nemu && make EXTRA_FLAGS='-DDISABLE_MMIO'
- show_log=1 ./runall.sh &> testcases.log ; echo $? > testres.log - show_log=1 ./runall.sh &> testcases.log ; echo $? > testres.log
artifacts: artifacts:
paths: paths:
...@@ -17,6 +17,12 @@ build: ...@@ -17,6 +17,12 @@ build:
- nemu/testcases.log - nemu/testcases.log
expire_in: 1 week expire_in: 1 week
build-icc-pgo:
stage: build
script:
- pacman -Sy && pacman -S --noconfirm bison flex gettext sdl2 lib32-glibc grep
- export AM_HOME=$(pwd)/nexus-am/
- cd nemu && ./icc-build.sh
test: test:
stage: test stage: test
......
...@@ -19,8 +19,9 @@ include Makefile.git ...@@ -19,8 +19,9 @@ include Makefile.git
CXX ?= g++ CXX ?= g++
LD = $(CXX) LD = $(CXX)
INCLUDES = $(addprefix -I, $(INC_DIR)) INCLUDES = $(addprefix -I, $(INC_DIR))
CFLAGS += -O2 -MMD -Wall -ggdb3 $(INCLUDES) -fomit-frame-pointer -std=c++17 CFLAGS += -O3 -MMD -Wall $(INCLUDES) -fomit-frame-pointer -std=c++17
CFLAGS += -DDIFF_TEST_QEMU CFLAGS += $(EXTRA_FLAGS)
# CFLAGS += -DDIFF_TEST_QEMU
# Source code generation before any targets. # Source code generation before any targets.
SUBDIRS = src/monitor/debug/expr_impl SUBDIRS = src/monitor/debug/expr_impl
...@@ -53,7 +54,7 @@ NEMU_EXEC := $(BINARY) $(ARGS) ...@@ -53,7 +54,7 @@ NEMU_EXEC := $(BINARY) $(ARGS)
$(BINARY): $(OBJS) $(BINARY): $(OBJS)
$(call git_commit, "compile") $(call git_commit, "compile")
@echo + LD $@ @echo + LD $@
@$(LD) -O2 -rdynamic $(SO_LDLAGS) -o $@ $^ -lSDL2 -lreadline -ldl @$(LD) -O2 -rdynamic $(SO_LDLAGS) -o $@ $^ -lSDL2 -lreadline -ldl -pthread
run: $(BINARY) run: $(BINARY)
$(call git_commit, "run") $(call git_commit, "run")
......
#!/bin/bash
xflags="-no-ansi-alias -DDISABLE_MMIO -no-complex-limited-range -qopt-prefetch=2"
cpus=$(grep -c '^processor' /proc/cpuinfo)
make clean &&
make EXTRA_FLAGS="$xflags -prof-gen" CXX=icpc -j$cpus &&
make -C "$AM_HOME/apps/microbench" ARCH=x86-nemu &&
build/nemu -b "$AM_HOME/apps/microbench/build/microbench-x86-nemu.bin" &&
make clean &&
make EXTRA_FLAGS="$xflags -prof-use" CXX=icpc -j$cpus
exit $?
#ifndef __COMMON_H__ #ifndef __COMMON_H__
#define __COMMON_H__ #define __COMMON_H__
#define DEBUG //#define DEBUG
//#define DIFF_TEST //#define DIFF_TEST
#if _SHARE #if _SHARE
......
...@@ -15,4 +15,7 @@ ...@@ -15,4 +15,7 @@
#define RLIB_MACRO_DEBUG_ASSERT(expr) #define RLIB_MACRO_DEBUG_ASSERT(expr)
#endif #endif
#define RLIB_MACRO_LIKELY(x) __builtin_expect((x),1)
#define RLIB_MACRO_UNLIKELY(x) __builtin_expect((x),0)
#endif #endif
...@@ -167,11 +167,11 @@ namespace pretty_print ...@@ -167,11 +167,11 @@ namespace pretty_print
template <typename T, typename TChar, typename TCharTraits, typename TDelimiters> template <typename T, typename TChar, typename TCharTraits, typename TDelimiters>
template <typename T1, typename T2> template <typename T1, typename T2>
struct print_container_helper<T, TChar, TCharTraits, TDelimiters>::printer<std::pair<T1, T2>> struct print_container_helper<T, TChar, TCharTraits, TDelimiters>::printer<::std::pair<T1, T2>>
{ {
using ostream_type = typename print_container_helper<T, TChar, TCharTraits, TDelimiters>::ostream_type; using ostream_type = typename print_container_helper<T, TChar, TCharTraits, TDelimiters>::ostream_type;
static void print_body(const std::pair<T1, T2> & c, ostream_type & stream) static void print_body(const ::std::pair<T1, T2> & c, ostream_type & stream)
{ {
stream << c.first; stream << c.first;
if (print_container_helper<T, TChar, TCharTraits, TDelimiters>::delimiters_type::values.delimiter != NULL) if (print_container_helper<T, TChar, TCharTraits, TDelimiters>::delimiters_type::values.delimiter != NULL)
...@@ -184,10 +184,10 @@ namespace pretty_print ...@@ -184,10 +184,10 @@ namespace pretty_print
template <typename T, typename TChar, typename TCharTraits, typename TDelimiters> template <typename T, typename TChar, typename TCharTraits, typename TDelimiters>
template <typename ...Args> template <typename ...Args>
struct print_container_helper<T, TChar, TCharTraits, TDelimiters>::printer<std::tuple<Args...>> struct print_container_helper<T, TChar, TCharTraits, TDelimiters>::printer<::std::tuple<Args...>>
{ {
using ostream_type = typename print_container_helper<T, TChar, TCharTraits, TDelimiters>::ostream_type; using ostream_type = typename print_container_helper<T, TChar, TCharTraits, TDelimiters>::ostream_type;
using element_type = std::tuple<Args...>; using element_type = ::std::tuple<Args...>;
template <std::size_t I> struct Int { }; template <std::size_t I> struct Int { };
...@@ -249,10 +249,10 @@ namespace pretty_print ...@@ -249,10 +249,10 @@ namespace pretty_print
struct is_container<std::valarray<T>> : std::true_type { }; struct is_container<std::valarray<T>> : std::true_type { };
template <typename T1, typename T2> template <typename T1, typename T2>
struct is_container<std::pair<T1, T2>> : std::true_type { }; struct is_container<::std::pair<T1, T2>> : std::true_type { };
template <typename ...Args> template <typename ...Args>
struct is_container<std::tuple<Args...>> : std::true_type { }; struct is_container<::std::tuple<Args...>> : std::true_type { };
// Default delimiters // Default delimiters
...@@ -316,13 +316,13 @@ namespace pretty_print ...@@ -316,13 +316,13 @@ namespace pretty_print
// Delimiters for pair and tuple // Delimiters for pair and tuple
template <typename T1, typename T2> struct delimiters<std::pair<T1, T2>, char> { static const delimiters_values<char> values; }; template <typename T1, typename T2> struct delimiters<::std::pair<T1, T2>, char> { static const delimiters_values<char> values; };
template <typename T1, typename T2> const delimiters_values<char> delimiters<std::pair<T1, T2>, char>::values = { "(", ", ", ")" }; template <typename T1, typename T2> const delimiters_values<char> delimiters<::std::pair<T1, T2>, char>::values = { "(", ", ", ")" };
template <typename T1, typename T2> struct delimiters< ::std::pair<T1, T2>, wchar_t> { static const delimiters_values<wchar_t> values; }; template <typename T1, typename T2> struct delimiters< ::std::pair<T1, T2>, wchar_t> { static const delimiters_values<wchar_t> values; };
template <typename T1, typename T2> const delimiters_values<wchar_t> delimiters< ::std::pair<T1, T2>, wchar_t>::values = { L"(", L", ", L")" }; template <typename T1, typename T2> const delimiters_values<wchar_t> delimiters< ::std::pair<T1, T2>, wchar_t>::values = { L"(", L", ", L")" };
template <typename ...Args> struct delimiters<std::tuple<Args...>, char> { static const delimiters_values<char> values; }; template <typename ...Args> struct delimiters<::std::tuple<Args...>, char> { static const delimiters_values<char> values; };
template <typename ...Args> const delimiters_values<char> delimiters<std::tuple<Args...>, char>::values = { "(", ", ", ")" }; template <typename ...Args> const delimiters_values<char> delimiters<::std::tuple<Args...>, char>::values = { "(", ", ", ")" };
template <typename ...Args> struct delimiters< ::std::tuple<Args...>, wchar_t> { static const delimiters_values<wchar_t> values; }; template <typename ...Args> struct delimiters< ::std::tuple<Args...>, wchar_t> { static const delimiters_values<wchar_t> values; };
template <typename ...Args> const delimiters_values<wchar_t> delimiters< ::std::tuple<Args...>, wchar_t>::values = { L"(", L", ", L")" }; template <typename ...Args> const delimiters_values<wchar_t> delimiters< ::std::tuple<Args...>, wchar_t>::values = { L"(", L", ", L")" };
......
...@@ -11,7 +11,7 @@ namespace rlib { ...@@ -11,7 +11,7 @@ namespace rlib {
return (int8_t)val; return (int8_t)val;
else if constexpr(BytesCount == 2) else if constexpr(BytesCount == 2)
return (int16_t)val; return (int16_t)val;
else return val; return val;
} }
} }
......
...@@ -14,10 +14,8 @@ typedef struct { ...@@ -14,10 +14,8 @@ typedef struct {
#define EMPTY EX(inv) #define EMPTY EX(inv)
static inline void set_width(int width) { static inline void set_width(int width) {
if (width == 0) { const auto tmp = width == 0 ? (decoding.is_operand_size_16 ? 2 : 4) : width;
width = decoding.is_operand_size_16 ? 2 : 4; decoding.src.width = decoding.dest.width = decoding.src2.width = tmp;
}
decoding.src.width = decoding.dest.width = decoding.src2.width = width;
} }
/* Instruction Decode and EXecute */ /* Instruction Decode and EXecute */
...@@ -211,7 +209,7 @@ namespace EHelperImpl { ...@@ -211,7 +209,7 @@ namespace EHelperImpl {
idex(eip, &opcode_table[opcode]); idex(eip, &opcode_table[opcode]);
} }
make_EHelper(real) { __attribute__((hot)) make_EHelper(real) {
uint32_t opcode = instr_fetch(eip, 1); uint32_t opcode = instr_fetch(eip, 1);
decoding.opcode = opcode; decoding.opcode = opcode;
set_width(opcode_table[opcode].width); set_width(opcode_table[opcode].width);
......
...@@ -6,13 +6,16 @@ ...@@ -6,13 +6,16 @@
#include <signal.h> #include <signal.h>
#include <SDL2/SDL.h> #include <SDL2/SDL.h>
#include <thread>
#include <atomic>
#define TIMER_HZ 100 #define TIMER_HZ 100
#define VGA_HZ 50 #define VGA_HZ 50
static uint64_t jiffy = 0; static uint64_t jiffy = 0;
static struct itimerval it; static struct itimerval it;
static int device_update_flag = false; static std::atomic<bool> device_update_flag(false);
static int update_screen_flag = false; static std::atomic<bool> update_screen_flag(false);
void init_serial(); void init_serial();
void init_timer(); void init_timer();
...@@ -37,12 +40,9 @@ static void timer_sig_handler(int signum) { ...@@ -37,12 +40,9 @@ static void timer_sig_handler(int signum) {
Assert(ret == 0, "Can not set timer"); Assert(ret == 0, "Can not set timer");
} }
void device_update() { void device_update() {} // Now an independent thread will do it.
if (!device_update_flag) {
return;
}
device_update_flag = false;
void device_update_impl() {
if (update_screen_flag) { if (update_screen_flag) {
update_screen(); update_screen();
update_screen_flag = false; update_screen_flag = false;
...@@ -72,6 +72,16 @@ void device_update() { ...@@ -72,6 +72,16 @@ void device_update() {
} }
} }
static void device_update_thread_daemon() {
while(true) {
if(device_update_flag.exchange(false)) {
device_update_impl();
}
// At most, 1000FPS
std::this_thread::sleep_for(std::chrono::milliseconds(1));
}
}
void sdl_clear_event_queue() { void sdl_clear_event_queue() {
SDL_Event event; SDL_Event event;
while (SDL_PollEvent(&event)); while (SDL_PollEvent(&event));
...@@ -93,6 +103,8 @@ void init_device() { ...@@ -93,6 +103,8 @@ void init_device() {
it.it_value.tv_usec = 1000000 / TIMER_HZ; it.it_value.tv_usec = 1000000 / TIMER_HZ;
ret = setitimer(ITIMER_VIRTUAL, &it, NULL); ret = setitimer(ITIMER_VIRTUAL, &it, NULL);
Assert(ret == 0, "Can not set timer"); Assert(ret == 0, "Can not set timer");
std::thread(device_update_thread_daemon).detach();
} }
#else #else
......
...@@ -33,9 +33,8 @@ void* add_mmio_map(paddr_t addr, int len, mmio_callback_t callback) { ...@@ -33,9 +33,8 @@ void* add_mmio_map(paddr_t addr, int len, mmio_callback_t callback) {
} }
/* bus interface */ /* bus interface */
int is_mmio(paddr_t addr) { __attribute__((hot)) int is_mmio(paddr_t addr) {
int i; for (int i = 0; i < nr_map; i ++) {
for (i = 0; i < nr_map; i ++) {
if (addr >= maps[i].low && addr <= maps[i].high) { if (addr >= maps[i].low && addr <= maps[i].high) {
return i; return i;
} }
......
...@@ -18,16 +18,15 @@ static SDL_Texture *texture; ...@@ -18,16 +18,15 @@ static SDL_Texture *texture;
static uint32_t (*vmem) [SCREEN_W]; static uint32_t (*vmem) [SCREEN_W];
static uint32_t *screensize_port_base; static uint32_t *screensize_port_base;
void update_screen() { inline void SDL_ErrorCheck(int ret) {
SDL_UpdateTexture(texture, NULL, vmem, SCREEN_W * sizeof(vmem[0][0])); if(ret != 0) {
SDL_RenderClear(renderer); rlib::println("SDL_Error: ret=", ret, ", GETERR=", SDL_GetError());
SDL_RenderCopy(renderer, texture, NULL, NULL); }
SDL_RenderPresent(renderer);
} }
void init_vga() { static void init_vga_impl() {
SDL_Init(SDL_INIT_VIDEO); SDL_ErrorCheck(SDL_Init(SDL_INIT_VIDEO));
SDL_CreateWindowAndRenderer(SCREEN_W * 2, SCREEN_H * 2, 0, &window, &renderer); SDL_ErrorCheck(SDL_CreateWindowAndRenderer(SCREEN_W * 2, SCREEN_H * 2, 0, &window, &renderer));
SDL_SetWindowTitle(window, "NEMU"); SDL_SetWindowTitle(window, "NEMU");
texture = SDL_CreateTexture(renderer, SDL_PIXELFORMAT_ARGB8888, texture = SDL_CreateTexture(renderer, SDL_PIXELFORMAT_ARGB8888,
SDL_TEXTUREACCESS_STATIC, SCREEN_W, SCREEN_H); SDL_TEXTUREACCESS_STATIC, SCREEN_W, SCREEN_H);
...@@ -36,4 +35,20 @@ void init_vga() { ...@@ -36,4 +35,20 @@ void init_vga() {
*screensize_port_base = ((SCREEN_W) << 16) | (SCREEN_H); *screensize_port_base = ((SCREEN_W) << 16) | (SCREEN_H);
vmem = reinterpret_cast<decltype(vmem)>(add_mmio_map(VMEM, 0x80000, nullptr)); vmem = reinterpret_cast<decltype(vmem)>(add_mmio_map(VMEM, 0x80000, nullptr));
} }
void update_screen() {
#ifndef DISABLE_MMIO
if(window == nullptr) init_vga_impl();
SDL_ErrorCheck(SDL_UpdateTexture(texture, NULL, vmem, SCREEN_W * sizeof(vmem[0][0])));
SDL_ErrorCheck(SDL_RenderClear(renderer));
SDL_ErrorCheck(SDL_RenderCopy(renderer, texture, NULL, NULL));
SDL_RenderPresent(renderer);
#endif
}
void init_vga() {
// Because of fucking SDL design, vga_init should be done in updating thread.
// Do nothing in main thread.
}
#endif /* HAS_IOE */ #endif /* HAS_IOE */
#include "nemu.h" #include "nemu.h"
#include "device/mmio.h"
#define PMEM_SIZE (128 * 1024 * 1024) #define PMEM_SIZE (128 * 1024 * 1024)
...@@ -11,13 +12,34 @@ uint8_t pmem[PMEM_SIZE]; ...@@ -11,13 +12,34 @@ uint8_t pmem[PMEM_SIZE];
/* Memory accessing interfaces */ /* Memory accessing interfaces */
uint32_t paddr_read(paddr_t addr, int len) { __attribute__((hot)) uint32_t paddr_read(paddr_t addr, int len) {
return pmem_rw(addr, uint32_t) & (~0u >> ((4 - len) << 3)); static const uint32_t niddle[] = {0, 0xff, 0xffff, 0xffffff, 0xffffffff};
#ifndef DISABLE_MMIO
if(const auto mmio_id = is_mmio(addr); RLIB_MACRO_LIKELY(-1 == mmio_id)) {
#endif
return pmem_rw(addr, uint32_t) & niddle[len];
#ifndef DISABLE_MMIO
}
else {
return mmio_read(addr, len, mmio_id);
}
#endif
} }
void paddr_write(paddr_t addr, uint32_t data, int len) { void paddr_write(paddr_t addr, uint32_t data, int len) {
#ifndef DISABLE_MMIO
if(const auto mmio_id = is_mmio(addr); RLIB_MACRO_LIKELY(-1 == mmio_id)) {
#endif
memcpy(guest_to_host(addr), &data, len); memcpy(guest_to_host(addr), &data, len);
#ifndef DISABLE_MMIO
} }
else {
mmio_write(addr, len, data, mmio_id);
}
#endif
}
// len is Bytes. // len is Bytes.
uint32_t vaddr_read(vaddr_t addr, int len) { uint32_t vaddr_read(vaddr_t addr, int len) {
......
...@@ -3,11 +3,15 @@ ...@@ -3,11 +3,15 @@
#include <amdev.h> #include <amdev.h>
size_t input_read(uintptr_t reg, void *buf, size_t size) { size_t input_read(uintptr_t reg, void *buf, size_t size) {
const uint32_t I8042_DATA_PORT = 0x60;
switch (reg) { switch (reg) {
case _DEVREG_INPUT_KBD: { case _DEVREG_INPUT_KBD: {
_KbdReg *kbd = (_KbdReg *)buf; _KbdReg *kbd = (_KbdReg *)buf;
kbd->keydown = 0; uint32_t press = inl(I8042_DATA_PORT);
kbd->keycode = _KEY_NONE; kbd->keycode = press;
if(press != _KEY_NONE){
kbd->keydown = !(kbd->keydown);
}
return sizeof(_KbdReg); return sizeof(_KbdReg);
} }
} }
......
...@@ -6,11 +6,13 @@ ...@@ -6,11 +6,13 @@
static uint32_t* const fb __attribute__((used)) = (uint32_t *)0x40000; static uint32_t* const fb __attribute__((used)) = (uint32_t *)0x40000;
size_t video_read(uintptr_t reg, void *buf, size_t size) { size_t video_read(uintptr_t reg, void *buf, size_t size) {
const uint32_t SCREEN_PORT = 0x100;
switch (reg) { switch (reg) {
case _DEVREG_VIDEO_INFO: { case _DEVREG_VIDEO_INFO: {
_VideoInfoReg *info = (_VideoInfoReg *)buf; _VideoInfoReg *info = (_VideoInfoReg *)buf;
info->width = 0; uint32_t screen = inl(SCREEN_PORT);
info->height = 0; info->width = screen >> 16;
info->height = screen << 16 >> 16;
return sizeof(_VideoInfoReg); return sizeof(_VideoInfoReg);
} }
} }
...@@ -21,7 +23,9 @@ size_t video_write(uintptr_t reg, void *buf, size_t size) { ...@@ -21,7 +23,9 @@ size_t video_write(uintptr_t reg, void *buf, size_t size) {
switch (reg) { switch (reg) {
case _DEVREG_VIDEO_FBCTL: { case _DEVREG_VIDEO_FBCTL: {
_FBCtlReg *ctl = (_FBCtlReg *)buf; _FBCtlReg *ctl = (_FBCtlReg *)buf;
for(int i = 0; i < ctl->h; ++i) {
memcpy(fb+(ctl->y+i)*screen_width()+ctl->x,ctl->pixels+i*ctl->w,ctl->w*4);
}
if (ctl->sync) { if (ctl->sync) {
// do nothing, hardware syncs. // do nothing, hardware syncs.
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment