Commit b2d955b3 authored by Recolic Keghart's avatar Recolic Keghart
Browse files

Merge branch 'performance-tune' into 'pa2'

Performance tune

See merge request recolic-hust/hust-x86-simulator!2
parents 6f8b0120 461d6a49
Pipeline #813 passed with stages
in 10 minutes and 47 seconds
image: archlinux/base
image: recolic/cxx-toolkit
stages:
- build
......@@ -7,9 +7,9 @@ stages:
build:
stage: build
script:
- pacman -Sy && pacman -S --noconfirm bison flex gcc make gettext sdl2 lib32-glibc grep
- pacman -Sy && pacman -S --noconfirm bison flex gettext sdl2 lib32-glibc grep
- export AM_HOME=$(pwd)/nexus-am/
- cd nemu && make
- cd nemu && make EXTRA_FLAGS='-DDISABLE_MMIO'
- show_log=1 ./runall.sh &> testcases.log ; echo $? > testres.log
artifacts:
paths:
......@@ -17,6 +17,12 @@ build:
- nemu/testcases.log
expire_in: 1 week
build-icc-pgo:
stage: build
script:
- pacman -Sy && pacman -S --noconfirm bison flex gettext sdl2 lib32-glibc grep
- export AM_HOME=$(pwd)/nexus-am/
- cd nemu && ./icc-build.sh
test:
stage: test
......
......@@ -19,8 +19,9 @@ include Makefile.git
CXX ?= g++
LD = $(CXX)
INCLUDES = $(addprefix -I, $(INC_DIR))
CFLAGS += -O2 -MMD -Wall -ggdb3 $(INCLUDES) -fomit-frame-pointer -std=c++17
CFLAGS += -DDIFF_TEST_QEMU
CFLAGS += -O3 -MMD -Wall $(INCLUDES) -fomit-frame-pointer -std=c++17
CFLAGS += $(EXTRA_FLAGS)
# CFLAGS += -DDIFF_TEST_QEMU
# Source code generation before any targets.
SUBDIRS = src/monitor/debug/expr_impl
......@@ -53,7 +54,7 @@ NEMU_EXEC := $(BINARY) $(ARGS)
$(BINARY): $(OBJS)
$(call git_commit, "compile")
@echo + LD $@
@$(LD) -O2 -rdynamic $(SO_LDLAGS) -o $@ $^ -lSDL2 -lreadline -ldl
@$(LD) -O2 -rdynamic $(SO_LDLAGS) -o $@ $^ -lSDL2 -lreadline -ldl -pthread
run: $(BINARY)
$(call git_commit, "run")
......
#!/bin/bash
xflags="-no-ansi-alias -DDISABLE_MMIO -no-complex-limited-range -qopt-prefetch=2"
cpus=$(grep -c '^processor' /proc/cpuinfo)
make clean &&
make EXTRA_FLAGS="$xflags -prof-gen" CXX=icpc -j$cpus &&
make -C "$AM_HOME/apps/microbench" ARCH=x86-nemu &&
build/nemu -b "$AM_HOME/apps/microbench/build/microbench-x86-nemu.bin" &&
make clean &&
make EXTRA_FLAGS="$xflags -prof-use" CXX=icpc -j$cpus
exit $?
#ifndef __COMMON_H__
#define __COMMON_H__
#define DEBUG
//#define DEBUG
//#define DIFF_TEST
#if _SHARE
......
......@@ -15,4 +15,7 @@
#define RLIB_MACRO_DEBUG_ASSERT(expr)
#endif
#define RLIB_MACRO_LIKELY(x) __builtin_expect((x),1)
#define RLIB_MACRO_UNLIKELY(x) __builtin_expect((x),0)
#endif
......@@ -167,11 +167,11 @@ namespace pretty_print
template <typename T, typename TChar, typename TCharTraits, typename TDelimiters>
template <typename T1, typename T2>
struct print_container_helper<T, TChar, TCharTraits, TDelimiters>::printer<std::pair<T1, T2>>
struct print_container_helper<T, TChar, TCharTraits, TDelimiters>::printer<::std::pair<T1, T2>>
{
using ostream_type = typename print_container_helper<T, TChar, TCharTraits, TDelimiters>::ostream_type;
static void print_body(const std::pair<T1, T2> & c, ostream_type & stream)
static void print_body(const ::std::pair<T1, T2> & c, ostream_type & stream)
{
stream << c.first;
if (print_container_helper<T, TChar, TCharTraits, TDelimiters>::delimiters_type::values.delimiter != NULL)
......@@ -184,10 +184,10 @@ namespace pretty_print
template <typename T, typename TChar, typename TCharTraits, typename TDelimiters>
template <typename ...Args>
struct print_container_helper<T, TChar, TCharTraits, TDelimiters>::printer<std::tuple<Args...>>
struct print_container_helper<T, TChar, TCharTraits, TDelimiters>::printer<::std::tuple<Args...>>
{
using ostream_type = typename print_container_helper<T, TChar, TCharTraits, TDelimiters>::ostream_type;
using element_type = std::tuple<Args...>;
using element_type = ::std::tuple<Args...>;
template <std::size_t I> struct Int { };
......@@ -249,10 +249,10 @@ namespace pretty_print
struct is_container<std::valarray<T>> : std::true_type { };
template <typename T1, typename T2>
struct is_container<std::pair<T1, T2>> : std::true_type { };
struct is_container<::std::pair<T1, T2>> : std::true_type { };
template <typename ...Args>
struct is_container<std::tuple<Args...>> : std::true_type { };
struct is_container<::std::tuple<Args...>> : std::true_type { };
// Default delimiters
......@@ -316,13 +316,13 @@ namespace pretty_print
// Delimiters for pair and tuple
template <typename T1, typename T2> struct delimiters<std::pair<T1, T2>, char> { static const delimiters_values<char> values; };
template <typename T1, typename T2> const delimiters_values<char> delimiters<std::pair<T1, T2>, char>::values = { "(", ", ", ")" };
template <typename T1, typename T2> struct delimiters<::std::pair<T1, T2>, char> { static const delimiters_values<char> values; };
template <typename T1, typename T2> const delimiters_values<char> delimiters<::std::pair<T1, T2>, char>::values = { "(", ", ", ")" };
template <typename T1, typename T2> struct delimiters< ::std::pair<T1, T2>, wchar_t> { static const delimiters_values<wchar_t> values; };
template <typename T1, typename T2> const delimiters_values<wchar_t> delimiters< ::std::pair<T1, T2>, wchar_t>::values = { L"(", L", ", L")" };
template <typename ...Args> struct delimiters<std::tuple<Args...>, char> { static const delimiters_values<char> values; };
template <typename ...Args> const delimiters_values<char> delimiters<std::tuple<Args...>, char>::values = { "(", ", ", ")" };
template <typename ...Args> struct delimiters<::std::tuple<Args...>, char> { static const delimiters_values<char> values; };
template <typename ...Args> const delimiters_values<char> delimiters<::std::tuple<Args...>, char>::values = { "(", ", ", ")" };
template <typename ...Args> struct delimiters< ::std::tuple<Args...>, wchar_t> { static const delimiters_values<wchar_t> values; };
template <typename ...Args> const delimiters_values<wchar_t> delimiters< ::std::tuple<Args...>, wchar_t>::values = { L"(", L", ", L")" };
......
......@@ -11,7 +11,7 @@ namespace rlib {
return (int8_t)val;
else if constexpr(BytesCount == 2)
return (int16_t)val;
else return val;
return val;
}
}
......
......@@ -14,10 +14,8 @@ typedef struct {
#define EMPTY EX(inv)
static inline void set_width(int width) {
if (width == 0) {
width = decoding.is_operand_size_16 ? 2 : 4;
}
decoding.src.width = decoding.dest.width = decoding.src2.width = width;
const auto tmp = width == 0 ? (decoding.is_operand_size_16 ? 2 : 4) : width;
decoding.src.width = decoding.dest.width = decoding.src2.width = tmp;
}
/* Instruction Decode and EXecute */
......@@ -211,7 +209,7 @@ namespace EHelperImpl {
idex(eip, &opcode_table[opcode]);
}
make_EHelper(real) {
__attribute__((hot)) make_EHelper(real) {
uint32_t opcode = instr_fetch(eip, 1);
decoding.opcode = opcode;
set_width(opcode_table[opcode].width);
......
......@@ -6,13 +6,16 @@
#include <signal.h>
#include <SDL2/SDL.h>
#include <thread>
#include <atomic>
#define TIMER_HZ 100
#define VGA_HZ 50
static uint64_t jiffy = 0;
static struct itimerval it;
static int device_update_flag = false;
static int update_screen_flag = false;
static std::atomic<bool> device_update_flag(false);
static std::atomic<bool> update_screen_flag(false);
void init_serial();
void init_timer();
......@@ -37,12 +40,9 @@ static void timer_sig_handler(int signum) {
Assert(ret == 0, "Can not set timer");
}
void device_update() {
if (!device_update_flag) {
return;
}
device_update_flag = false;
void device_update() {} // Now an independent thread will do it.
void device_update_impl() {
if (update_screen_flag) {
update_screen();
update_screen_flag = false;
......@@ -72,6 +72,16 @@ void device_update() {
}
}
static void device_update_thread_daemon() {
while(true) {
if(device_update_flag.exchange(false)) {
device_update_impl();
}
// At most, 1000FPS
std::this_thread::sleep_for(std::chrono::milliseconds(1));
}
}
void sdl_clear_event_queue() {
SDL_Event event;
while (SDL_PollEvent(&event));
......@@ -93,6 +103,8 @@ void init_device() {
it.it_value.tv_usec = 1000000 / TIMER_HZ;
ret = setitimer(ITIMER_VIRTUAL, &it, NULL);
Assert(ret == 0, "Can not set timer");
std::thread(device_update_thread_daemon).detach();
}
#else
......
......@@ -33,9 +33,8 @@ void* add_mmio_map(paddr_t addr, int len, mmio_callback_t callback) {
}
/* bus interface */
int is_mmio(paddr_t addr) {
int i;
for (i = 0; i < nr_map; i ++) {
__attribute__((hot)) int is_mmio(paddr_t addr) {
for (int i = 0; i < nr_map; i ++) {
if (addr >= maps[i].low && addr <= maps[i].high) {
return i;
}
......
......@@ -18,16 +18,15 @@ static SDL_Texture *texture;
static uint32_t (*vmem) [SCREEN_W];
static uint32_t *screensize_port_base;
void update_screen() {
SDL_UpdateTexture(texture, NULL, vmem, SCREEN_W * sizeof(vmem[0][0]));
SDL_RenderClear(renderer);
SDL_RenderCopy(renderer, texture, NULL, NULL);
SDL_RenderPresent(renderer);
inline void SDL_ErrorCheck(int ret) {
if(ret != 0) {
rlib::println("SDL_Error: ret=", ret, ", GETERR=", SDL_GetError());
}
}
void init_vga() {
SDL_Init(SDL_INIT_VIDEO);
SDL_CreateWindowAndRenderer(SCREEN_W * 2, SCREEN_H * 2, 0, &window, &renderer);
static void init_vga_impl() {
SDL_ErrorCheck(SDL_Init(SDL_INIT_VIDEO));
SDL_ErrorCheck(SDL_CreateWindowAndRenderer(SCREEN_W * 2, SCREEN_H * 2, 0, &window, &renderer));
SDL_SetWindowTitle(window, "NEMU");
texture = SDL_CreateTexture(renderer, SDL_PIXELFORMAT_ARGB8888,
SDL_TEXTUREACCESS_STATIC, SCREEN_W, SCREEN_H);
......@@ -36,4 +35,20 @@ void init_vga() {
*screensize_port_base = ((SCREEN_W) << 16) | (SCREEN_H);
vmem = reinterpret_cast<decltype(vmem)>(add_mmio_map(VMEM, 0x80000, nullptr));
}
void update_screen() {
#ifndef DISABLE_MMIO
if(window == nullptr) init_vga_impl();
SDL_ErrorCheck(SDL_UpdateTexture(texture, NULL, vmem, SCREEN_W * sizeof(vmem[0][0])));
SDL_ErrorCheck(SDL_RenderClear(renderer));
SDL_ErrorCheck(SDL_RenderCopy(renderer, texture, NULL, NULL));
SDL_RenderPresent(renderer);
#endif
}
void init_vga() {
// Because of fucking SDL design, vga_init should be done in updating thread.
// Do nothing in main thread.
}
#endif /* HAS_IOE */
#include "nemu.h"
#include "device/mmio.h"
#define PMEM_SIZE (128 * 1024 * 1024)
......@@ -11,14 +12,35 @@ uint8_t pmem[PMEM_SIZE];
/* Memory accessing interfaces */
uint32_t paddr_read(paddr_t addr, int len) {
return pmem_rw(addr, uint32_t) & (~0u >> ((4 - len) << 3));
__attribute__((hot)) uint32_t paddr_read(paddr_t addr, int len) {
static const uint32_t niddle[] = {0, 0xff, 0xffff, 0xffffff, 0xffffffff};
#ifndef DISABLE_MMIO
if(const auto mmio_id = is_mmio(addr); RLIB_MACRO_LIKELY(-1 == mmio_id)) {
#endif
return pmem_rw(addr, uint32_t) & niddle[len];
#ifndef DISABLE_MMIO
}
else {
return mmio_read(addr, len, mmio_id);
}
#endif
}
void paddr_write(paddr_t addr, uint32_t data, int len) {
memcpy(guest_to_host(addr), &data, len);
#ifndef DISABLE_MMIO
if(const auto mmio_id = is_mmio(addr); RLIB_MACRO_LIKELY(-1 == mmio_id)) {
#endif
memcpy(guest_to_host(addr), &data, len);
#ifndef DISABLE_MMIO
}
else {
mmio_write(addr, len, data, mmio_id);
}
#endif
}
// len is Bytes.
uint32_t vaddr_read(vaddr_t addr, int len) {
return paddr_read(addr, len);
......
......@@ -3,11 +3,15 @@
#include <amdev.h>
size_t input_read(uintptr_t reg, void *buf, size_t size) {
const uint32_t I8042_DATA_PORT = 0x60;
switch (reg) {
case _DEVREG_INPUT_KBD: {
_KbdReg *kbd = (_KbdReg *)buf;
kbd->keydown = 0;
kbd->keycode = _KEY_NONE;
uint32_t press = inl(I8042_DATA_PORT);
kbd->keycode = press;
if(press != _KEY_NONE){
kbd->keydown = !(kbd->keydown);
}
return sizeof(_KbdReg);
}
}
......
......@@ -6,11 +6,13 @@
static uint32_t* const fb __attribute__((used)) = (uint32_t *)0x40000;
size_t video_read(uintptr_t reg, void *buf, size_t size) {
const uint32_t SCREEN_PORT = 0x100;
switch (reg) {
case _DEVREG_VIDEO_INFO: {
_VideoInfoReg *info = (_VideoInfoReg *)buf;
info->width = 0;
info->height = 0;
uint32_t screen = inl(SCREEN_PORT);
info->width = screen >> 16;
info->height = screen << 16 >> 16;
return sizeof(_VideoInfoReg);
}
}
......@@ -21,7 +23,9 @@ size_t video_write(uintptr_t reg, void *buf, size_t size) {
switch (reg) {
case _DEVREG_VIDEO_FBCTL: {
_FBCtlReg *ctl = (_FBCtlReg *)buf;
for(int i = 0; i < ctl->h; ++i) {
memcpy(fb+(ctl->y+i)*screen_width()+ctl->x,ctl->pixels+i*ctl->w,ctl->w*4);
}
if (ctl->sync) {
// do nothing, hardware syncs.
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment