initial
This commit is contained in:
@@ -0,0 +1,27 @@
|
||||
# Clock pin
|
||||
set_property PACKAGE_PIN E3 [get_ports CLK]
|
||||
set_property IOSTANDARD LVCMOS33 [get_ports CLK]
|
||||
|
||||
# LEDs
|
||||
set_property PACKAGE_PIN H5 [get_ports LEDS[0]]
|
||||
set_property PACKAGE_PIN J5 [get_ports LEDS[1]]
|
||||
set_property PACKAGE_PIN T9 [get_ports LEDS[2]]
|
||||
set_property PACKAGE_PIN T10 [get_ports LEDS[3]]
|
||||
set_property IOSTANDARD LVCMOS33 [get_ports LEDS[0]]
|
||||
set_property IOSTANDARD LVCMOS33 [get_ports LEDS[1]]
|
||||
set_property IOSTANDARD LVCMOS33 [get_ports LEDS[2]]
|
||||
set_property IOSTANDARD LVCMOS33 [get_ports LEDS[3]]
|
||||
|
||||
# Clock constraints
|
||||
create_clock -period 10.0 [get_ports CLK]
|
||||
|
||||
# UART
|
||||
set_property LOC D10 [get_ports TXD]
|
||||
set_property LOC A9 [get_ports RXD]
|
||||
set_property IOSTANDARD LVCMOS33 [get_ports RXD]
|
||||
set_property IOSTANDARD LVCMOS33 [get_ports TXD]
|
||||
|
||||
# reset button
|
||||
set_property LOC C2 [get_ports RESET]
|
||||
set_property IOSTANDARD LVCMOS33 [get_ports RESET]
|
||||
|
||||
@@ -0,0 +1,29 @@
|
||||
# Clock pin
|
||||
set_property PACKAGE_PIN L17 [get_ports CLK]
|
||||
set_property IOSTANDARD LVCMOS33 [get_ports CLK]
|
||||
|
||||
# LEDs
|
||||
set_property PACKAGE_PIN A17 [get_ports LEDS[0]]
|
||||
set_property PACKAGE_PIN C16 [get_ports LEDS[1]]
|
||||
set_property PACKAGE_PIN B17 [get_ports LEDS[2]]
|
||||
set_property PACKAGE_PIN B16 [get_ports LEDS[3]]
|
||||
set_property PACKAGE_PIN C17 [get_ports LEDS[4]]
|
||||
set_property IOSTANDARD LVCMOS33 [get_ports LEDS[0]]
|
||||
set_property IOSTANDARD LVCMOS33 [get_ports LEDS[1]]
|
||||
set_property IOSTANDARD LVCMOS33 [get_ports LEDS[2]]
|
||||
set_property IOSTANDARD LVCMOS33 [get_ports LEDS[3]]
|
||||
set_property IOSTANDARD LVCMOS33 [get_ports LEDS[4]]
|
||||
|
||||
# Clock constraints
|
||||
create_clock -period 83.33 [get_ports CLK]
|
||||
|
||||
# UART
|
||||
set_property LOC G17 [get_ports TXD]
|
||||
set_property LOC G19 [get_ports RXD]
|
||||
set_property IOSTANDARD LVCMOS33 [get_ports RXD]
|
||||
set_property IOSTANDARD LVCMOS33 [get_ports TXD]
|
||||
|
||||
# reset button
|
||||
set_property LOC A18 [get_ports RESET]
|
||||
set_property IOSTANDARD LVCMOS33 [get_ports RESET]
|
||||
|
||||
@@ -0,0 +1,35 @@
|
||||
# See https://github.com/emard/ulx3s/blob/master/doc/constraints/ulx3s_v20.lpf
|
||||
|
||||
## Clock #########################################
|
||||
|
||||
LOCATE COMP "CLK" SITE "A10";
|
||||
IOBUF PORT "CLK" IO_TYPE=LVCMOS33;
|
||||
FREQUENCY PORT "CLK" 12 MHZ;
|
||||
|
||||
## RESET button ##################################
|
||||
|
||||
LOCATE COMP "RESET" SITE "P4";
|
||||
IOBUF PORT "RESET" IO_TYPE=LVCMOS33;
|
||||
|
||||
## LEDs ##########################################
|
||||
|
||||
LOCATE COMP "LEDS[0]" SITE "B17";
|
||||
LOCATE COMP "LEDS[1]" SITE "A17";
|
||||
LOCATE COMP "LEDS[2]" SITE "C17";
|
||||
LOCATE COMP "LEDS[3]" SITE "B18";
|
||||
LOCATE COMP "LEDS[4]" SITE "A18";
|
||||
|
||||
IOBUF PORT "LEDS[0]" IO_TYPE=LVCMOS33;
|
||||
IOBUF PORT "LEDS[1]" IO_TYPE=LVCMOS33;
|
||||
IOBUF PORT "LEDS[2]" IO_TYPE=LVCMOS33;
|
||||
IOBUF PORT "LEDS[3]" IO_TYPE=LVCMOS33;
|
||||
IOBUF PORT "LEDS[4]" IO_TYPE=LVCMOS33;
|
||||
|
||||
## UART ######################################################
|
||||
|
||||
LOCATE COMP "TXD" SITE "D11";
|
||||
LOCATE COMP "RXD" SITE "D12";
|
||||
|
||||
IOBUF PORT "TXD" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
|
||||
IOBUF PORT "RXD" PULLMODE=UP IO_TYPE=LVCMOS33;
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
set_io CLK 35
|
||||
|
||||
set_io LEDS[0] 27
|
||||
set_io LEDS[1] 21
|
||||
set_io LEDS[2] 25
|
||||
set_io LEDS[3] 23
|
||||
set_io LEDS[4] 26
|
||||
|
||||
set_io TXD 9
|
||||
set_io RXD 6
|
||||
|
||||
set_io RESET 10
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
set_io CLK 21
|
||||
|
||||
set_io LEDS[0] 99
|
||||
set_io LEDS[1] 98
|
||||
set_io LEDS[2] 97
|
||||
set_io LEDS[3] 96
|
||||
set_io LEDS[4] 95
|
||||
|
||||
set_io TXD 8
|
||||
set_io RXD 9
|
||||
|
||||
set_io SPIFLASH_CLK 70
|
||||
set_io SPIFLASH_CS_N 71
|
||||
|
||||
set_io SPIFLASH_MOSI 67
|
||||
set_io SPIFLASH_MISO 68
|
||||
|
||||
set_io SPIFLASH_IO[0] 67
|
||||
set_io SPIFLASH_IO[1] 68
|
||||
|
||||
set_io RESET 47
|
||||
Executable
+18
@@ -0,0 +1,18 @@
|
||||
#!/usr/bin/env bash
|
||||
PROJECT_NAME=SOC
|
||||
DB_DIR=/usr/share/nextpnr/prjxray-db
|
||||
CHIPDB_DIR=/usr/share/nextpnr/xilinx-chipdb
|
||||
PART=xc7a35tcsg324-1
|
||||
VERILOGS=$1
|
||||
BOARD_FREQ=100
|
||||
CPU_FREQ=100
|
||||
|
||||
set -ex
|
||||
yosys -DARTY -DBOARD_FREQ=$BOARD_FREQ -DCPU_FREQ=$CPU_FREQ -p "scratchpad -set xilinx_dsp.multonly 1" -p "synth_xilinx -nowidelut -flatten -abc9 -arch xc7 -top SOC; write_json ${PROJECT_NAME}.json" ${VERILOGS}
|
||||
nextpnr-xilinx --chipdb ${CHIPDB_DIR}/xc7a35t.bin --xdc BOARDS/arty.xdc --json ${PROJECT_NAME}.json --write ${PROJECT_NAME}_routed.json --fasm ${PROJECT_NAME}.fasm
|
||||
fasm2frames --part ${PART} --db-root ${DB_DIR}/artix7 ${PROJECT_NAME}.fasm > ${PROJECT_NAME}.frames
|
||||
xc7frames2bit --part_file ${DB_DIR}/artix7/${PART}/part.yaml --part_name ${PART} --frm_file ${PROJECT_NAME}.frames --output_file ${PROJECT_NAME}.bit
|
||||
#To send to SRAM:
|
||||
openFPGALoader --board arty ${PROJECT_NAME}.bit
|
||||
#To send to FLASH:
|
||||
#openFPGALoader --board arty -f ${PROJECT_NAME}.bit
|
||||
Executable
+18
@@ -0,0 +1,18 @@
|
||||
#!/usr/bin/env bash
|
||||
PROJECT_NAME=SOC
|
||||
DB_DIR=/usr/share/nextpnr/prjxray-db
|
||||
CHIPDB_DIR=/usr/share/nextpnr/xilinx-chipdb
|
||||
PART=xc7a35tcpg236-1
|
||||
VERILOGS=$1
|
||||
BOARD_FREQ=100
|
||||
CPU_FREQ=100
|
||||
|
||||
set -ex
|
||||
yosys -DCMODA7 -DBOARD_FREQ=$BOARD_FREQ -DCPU_FREQ=$CPU_FREQ -p "scratchpad -set xilinx_dsp.multonly 1" -p "synth_xilinx -nowidelut -flatten -abc9 -arch xc7 -top SOC; write_json ${PROJECT_NAME}.json" ${VERILOGS}
|
||||
nextpnr-xilinx --chipdb ${CHIPDB_DIR}/xc7a35tcpg236-1.bin --xdc BOARDS/cmod_a7.xdc --json ${PROJECT_NAME}.json --write ${PROJECT_NAME}_routed.json --fasm ${PROJECT_NAME}.fasm
|
||||
fasm2frames --part ${PART} --db-root ${DB_DIR}/artix7 ${PROJECT_NAME}.fasm > ${PROJECT_NAME}.frames
|
||||
xc7frames2bit --part_file ${DB_DIR}/artix7/${PART}/part.yaml --part_name ${PART} --frm_file ${PROJECT_NAME}.frames --output_file ${PROJECT_NAME}.bit
|
||||
#To send to SRAM:
|
||||
openFPGALoader --freq 30e6 -c digilent --fpga-part xc7a35 femtosoc.bit
|
||||
#To send to FLASH:
|
||||
# openFPGALoader --freq 30e6 -c digilent --fpga-part xc7a35tcpg236 -f femtosoc.bit
|
||||
Executable
+13
@@ -0,0 +1,13 @@
|
||||
PROJECTNAME=SOC
|
||||
BOARD=ecp5_evn
|
||||
BOARD_FREQ=12
|
||||
CPU_FREQ=100
|
||||
FPGA_VARIANT=um5g-85k
|
||||
FPGA_PACKAGE=CABGA381
|
||||
VERILOGS=$1
|
||||
|
||||
yosys -q -DECP5_EVN -DBOARD_FREQ=$BOARD_FREQ -DCPU_FREQ=$CPU_FREQ -p "synth_ecp5 -abc9 -top $PROJECTNAME -json $PROJECTNAME.json" $VERILOGS || exit
|
||||
nextpnr-ecp5 --force --timing-allow-fail --json $PROJECTNAME.json --lpf BOARDS/$BOARD.lpf --textcfg $PROJECTNAME"_out".config --freq $BOARD_FREQ --$FPGA_VARIANT --package $FPGA_PACKAGE || exit
|
||||
ecppack --compress --svf-rowsize 100000 --svf $PROJECTNAME".svf" $PROJECTNAME"_out.config" $PROJECTNAME".bit" || exit
|
||||
ujprog -j FLASH $PROJECTNAME".bit" || exit
|
||||
|
||||
Executable
+30
@@ -0,0 +1,30 @@
|
||||
#!/bin/bash
|
||||
|
||||
# --- CONFIGURATION ---
|
||||
PROJECTNAME=SOC
|
||||
DEVICE='GW2A-LV18PG256C8/I7'
|
||||
BOARD='tangprimer20k'
|
||||
BOARD_FREQ=27
|
||||
CPU_FREQ=50
|
||||
VERILOGS=$1
|
||||
|
||||
# --- Synthesis with Yosys ---
|
||||
yosys -q -DPRIMER20K -DBOARD_FREQ=$BOARD_FREQ -DCPU_FREQ=$CPU_FREQ -D INV_BTN=0 -p "
|
||||
read_verilog $VERILOGS;
|
||||
synth_gowin -top $PROJECTNAME -json $PROJECTNAME.json -family gw2a" || exit 1
|
||||
|
||||
# --- Placement and Routing with nextpnr-himbaechel ---
|
||||
nextpnr-himbaechel \
|
||||
--json $PROJECTNAME.json \
|
||||
--write $PROJECTNAME"_pnr.json" \
|
||||
--device $DEVICE \
|
||||
--vopt cst=BOARDS/$BOARD.cst \
|
||||
--vopt family=GW2A-18 \
|
||||
--freq $BOARD_FREQ || exit 1
|
||||
|
||||
# --- Bitstream Packing with gowin_pack ---
|
||||
gowin_pack -d $DEVICE -o $PROJECTNAME.fs $PROJECTNAME"_pnr.json" || exit 1
|
||||
|
||||
# --- Programming with openFPGALoader ---
|
||||
openFPGALoader -b tangprimer20k $PROJECTNAME.fs || exit 1
|
||||
|
||||
Executable
+14
@@ -0,0 +1,14 @@
|
||||
PROJECTNAME=SOC
|
||||
BOARD=icebreaker
|
||||
BOARD_FREQ=12
|
||||
CPU_FREQ=20
|
||||
FPGA_VARIANT=up5k
|
||||
FPGA_PACKAGE=sg48
|
||||
VERILOGS=$1
|
||||
yosys -q -DICE_BREAKER -DNEGATIVE_RESET -DBOARD_FREQ=$BOARD_FREQ -DCPU_FREQ=$CPU_FREQ -p "synth_ice40 -abc9 -device u -dsp -top $PROJECTNAME -json $PROJECTNAME.json" $VERILOGS || exit
|
||||
nextpnr-ice40 --force --json $PROJECTNAME.json --pcf BOARDS/$BOARD.pcf --asc $PROJECTNAME.asc --freq $BOARD_FREQ --$FPGA_VARIANT --package $FPGA_PACKAGE --pcf-allow-unconstrained || exit
|
||||
icetime -p BOARDS/$BOARD.pcf -P $FPGA_PACKAGE -r $PROJECTNAME.timings -d up5k -t $PROJECTNAME.asc
|
||||
icepack $PROJECTNAME.asc $PROJECTNAME.bin || exit
|
||||
iceprog $PROJECTNAME.bin || exit
|
||||
echo DONE.
|
||||
|
||||
Executable
+14
@@ -0,0 +1,14 @@
|
||||
PROJECTNAME=SOC
|
||||
BOARD=icestick
|
||||
BOARD_FREQ=12
|
||||
CPU_FREQ=45
|
||||
FPGA_VARIANT=hx1k
|
||||
FPGA_PACKAGE=tq144
|
||||
VERILOGS=$1
|
||||
yosys -q -DICE_STICK -DBOARD_FREQ=$BOARD_FREQ -DCPU_FREQ=$CPU_FREQ -p "synth_ice40 -relut -top $PROJECTNAME -json $PROJECTNAME.json" $VERILOGS || exit
|
||||
nextpnr-ice40 --force --timing-allow-fail --json $PROJECTNAME.json --pcf BOARDS/$BOARD.pcf --asc $PROJECTNAME.asc --freq $CPU_FREQ --$FPGA_VARIANT --package $FPGA_PACKAGE --pcf-allow-unconstrained --opt-timing || exit
|
||||
icetime -p BOARDS/$BOARD.pcf -P $FPGA_PACKAGE -r $PROJECTNAME.timings -d hx1k -t $PROJECTNAME.asc
|
||||
icepack $PROJECTNAME.asc $PROJECTNAME.bin || exit
|
||||
iceprog $PROJECTNAME.bin || exit
|
||||
echo DONE.
|
||||
|
||||
Executable
+9
@@ -0,0 +1,9 @@
|
||||
PROJECTNAME=SOC
|
||||
BOARD=icestick
|
||||
BOARD_FREQ=12
|
||||
CPU_FREQ=45
|
||||
FPGA_VARIANT=hx1k
|
||||
FPGA_PACKAGE=tq144
|
||||
VERILOGS=$1
|
||||
yosys -q -DICE_STICK -DBOARD_FREQ=$BOARD_FREQ -DCPU_FREQ=$CPU_FREQ -p "synth_ice40 -relut -top $PROJECTNAME -json $PROJECTNAME.json" $VERILOGS || exit
|
||||
nextpnr-ice40 --gui --force --timing-allow-fail --json $PROJECTNAME.json --pcf BOARDS/$BOARD.pcf --asc $PROJECTNAME.asc --freq $CPU_FREQ --$FPGA_VARIANT --package $FPGA_PACKAGE --pcf-allow-unconstrained --opt-timing || exit
|
||||
Executable
+13
@@ -0,0 +1,13 @@
|
||||
PROJECTNAME=SOC
|
||||
BOARD=ulx3s
|
||||
BOARD_FREQ=25
|
||||
CPU_FREQ=100
|
||||
FPGA_VARIANT=85k
|
||||
FPGA_PACKAGE=CABGA381
|
||||
VERILOGS=$1
|
||||
|
||||
yosys -q -DULX3S -DBOARD_FREQ=$BOARD_FREQ -DCPU_FREQ=$CPU_FREQ -p "synth_ecp5 -abc9 -top $PROJECTNAME -json $PROJECTNAME.json" $VERILOGS || exit
|
||||
nextpnr-ecp5 --force --timing-allow-fail --json $PROJECTNAME.json --lpf BOARDS/$BOARD.lpf --textcfg $PROJECTNAME"_out".config --freq $BOARD_FREQ --$FPGA_VARIANT --package $FPGA_PACKAGE || exit
|
||||
ecppack --compress --svf-rowsize 100000 --svf $PROJECTNAME".svf" $PROJECTNAME"_out.config" $PROJECTNAME".bit" || exit
|
||||
ujprog -j FLASH $PROJECTNAME".bit" || exit
|
||||
|
||||
@@ -0,0 +1,143 @@
|
||||
IO_LOC "clk" H11;
|
||||
IO_PORT "clk" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "key_i" T3;
|
||||
IO_LOC "rst_i" T10;
|
||||
IO_PORT "rst_i" IO_TYPE=LVCMOS33;
|
||||
|
||||
IO_LOC "clk_i" IOT27A;
|
||||
|
||||
IO_LOC "led[0]" C13;
|
||||
IO_PORT "led[0]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "led[1]" A13;
|
||||
IO_PORT "led[1]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "led[2]" N16;
|
||||
IO_PORT "led[2]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "led[3]" N14;
|
||||
IO_PORT "led[3]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "led[4]" L14;
|
||||
IO_PORT "led[4]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "led[5]" L16;
|
||||
IO_PORT "led[5]" IO_TYPE=LVCMOS33;
|
||||
|
||||
IO_LOC "TXD" A15;
|
||||
IO_PORT "TXD" IO_TYPE=LVCMOS33 PULL_MODE=UP;
|
||||
IO_LOC "RXD" D14;
|
||||
IO_PORT "RXD" IO_TYPE=LVCMOS33 PULL_MODE=UP;
|
||||
|
||||
// fake
|
||||
IO_LOC "led[6]" A15;
|
||||
IO_PORT "led[6]" IO_TYPE=LVCMOS33 PULL_MODE=NONE;
|
||||
IO_LOC "led[7]" D14;
|
||||
IO_PORT "led[7]" IO_TYPE=LVCMOS33 PULL_MODE=NONE;
|
||||
|
||||
IO_LOC "tlvds_p" P6;
|
||||
IO_PORT "tlvds_p" IO_TYPE=LVDS25 PULL_MODE=NONE;
|
||||
IO_LOC "tlvds_n" T6;
|
||||
IO_PORT "tlvds_n" IO_TYPE=LVDS25 PULL_MODE=NONE;
|
||||
|
||||
IO_LOC "elvds_p" C12;
|
||||
IO_PORT "elvds_p" IO_TYPE=LVDS25 PULL_MODE=NONE;
|
||||
IO_LOC "elvds_n" B12;
|
||||
IO_PORT "elvds_n" IO_TYPE=LVDS25 PULL_MODE=NONE;
|
||||
|
||||
IO_LOC "LED_R" C13;
|
||||
IO_PORT "LED_R" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "LED_G" A13;
|
||||
IO_PORT "LED_G" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "LED_B" N16;
|
||||
IO_PORT "LED_B" IO_TYPE=LVCMOS33;
|
||||
|
||||
// oser
|
||||
IO_LOC "oser_out" C13;
|
||||
IO_PORT "oser_out" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "fclk_o" N16;
|
||||
IO_PORT "fclk_o" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "pclk_o" N14;
|
||||
IO_PORT "pclk_o" IO_TYPE=LVCMOS33;
|
||||
|
||||
// ides
|
||||
IO_LOC "fclk_i" B13;
|
||||
IO_PORT "fclk_i" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "data_i" C12;
|
||||
IO_PORT "data_i" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "q_o[0]" P9;
|
||||
IO_PORT "q_o[0]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "q_o[1]" E15;
|
||||
IO_PORT "q_o[1]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "q_o[2]" T7;
|
||||
IO_PORT "q_o[2]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "q_o[3]" R8;
|
||||
IO_PORT "q_o[3]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "q_o[4]" T6;
|
||||
IO_PORT "q_o[4]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "q_o[5]" P6;
|
||||
IO_PORT "q_o[5]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "q_o[6]" T8;
|
||||
IO_PORT "q_o[6]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "q_o[7]" P8;
|
||||
IO_PORT "q_o[7]" IO_TYPE=LVCMOS33;
|
||||
|
||||
// RGB LCD
|
||||
IO_LOC "LCD_CLK" R9;
|
||||
IO_PORT "LCD_CLK" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "LCD_HYNC" A15;
|
||||
IO_PORT "LCD_HYNC" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "LCD_SYNC" D14;
|
||||
IO_PORT "LCD_SYNC" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "LCD_DEN" E15;
|
||||
IO_PORT "LCD_DEN" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "LCD_R[0]" L9;
|
||||
IO_PORT "LCD_R[0]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "LCD_R[1]" N8;
|
||||
IO_PORT "LCD_R[1]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "LCD_R[2]" N9;
|
||||
IO_PORT "LCD_R[2]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "LCD_R[3]" N7;
|
||||
IO_PORT "LCD_R[3]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "LCD_R[4]" N6;
|
||||
IO_PORT "LCD_R[4]" IO_TYPE=LVCMOS33;
|
||||
|
||||
IO_LOC "LCD_G[0]" D11;
|
||||
IO_PORT "LCD_G[0]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "LCD_G[1]" A11;
|
||||
IO_PORT "LCD_G[1]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "LCD_G[2]" B11;
|
||||
IO_PORT "LCD_G[2]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "LCD_G[3]" P7;
|
||||
IO_PORT "LCD_G[3]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "LCD_G[4]" R7;
|
||||
IO_PORT "LCD_G[4]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "LCD_G[5]" D10;
|
||||
IO_PORT "LCD_G[5]" IO_TYPE=LVCMOS33;
|
||||
|
||||
IO_LOC "LCD_B[0]" B12;
|
||||
IO_PORT "LCD_B[0]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "LCD_B[1]" C12;
|
||||
IO_PORT "LCD_B[1]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "LCD_B[2]" B13;
|
||||
IO_PORT "LCD_B[2]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "LCD_B[3]" A14;
|
||||
IO_PORT "LCD_B[3]" IO_TYPE=LVCMOS33;
|
||||
IO_LOC "LCD_B[4]" B14;
|
||||
IO_PORT "LCD_B[4]" IO_TYPE=LVCMOS33;
|
||||
|
||||
// DVI
|
||||
IO_LOC "tmds_clk_p" G16;
|
||||
IO_PORT "tmds_clk_p" PULL_MODE=NONE DRIVE=3.5;
|
||||
IO_LOC "tmds_clk_n" H15;
|
||||
IO_PORT "tmds_clk_n" PULL_MODE=NONE DRIVE=3.5;
|
||||
IO_LOC "tmds_d_p[0]" H14;
|
||||
IO_PORT "tmds_d_p[0]" PULL_MODE=NONE DRIVE=3.5;
|
||||
IO_LOC "tmds_d_n[0]" H16;
|
||||
IO_PORT "tmds_d_n[0]" PULL_MODE=NONE DRIVE=3.5;
|
||||
IO_LOC "tmds_d_p[1]" J15;
|
||||
IO_PORT "tmds_d_p[1]" PULL_MODE=NONE DRIVE=3.5;
|
||||
IO_LOC "tmds_d_n[1]" K16;
|
||||
IO_PORT "tmds_d_n[1]" PULL_MODE=NONE DRIVE=3.5;
|
||||
IO_LOC "tmds_d_p[2]" K14;
|
||||
IO_PORT "tmds_d_p[2]" PULL_MODE=NONE DRIVE=3.5;
|
||||
IO_LOC "tmds_d_n[2]" K15;
|
||||
IO_PORT "tmds_d_n[2]" PULL_MODE=NONE DRIVE=3.5;
|
||||
|
||||
IO_LOC "div_led" C13;
|
||||
IO_PORT "div_led" IO_TYPE=LVCMOS33;
|
||||
@@ -0,0 +1,35 @@
|
||||
# See https://github.com/emard/ulx3s/blob/master/doc/constraints/ulx3s_v20.lpf
|
||||
|
||||
## Clock #########################################
|
||||
|
||||
LOCATE COMP "CLK" SITE "G2";
|
||||
IOBUF PORT "CLK" PULLMODE=NONE IO_TYPE=LVCMOS33;
|
||||
FREQUENCY PORT "CLK" 25 MHZ;
|
||||
|
||||
## RESET button ##################################
|
||||
|
||||
LOCATE COMP "RESET" SITE "T1"; # fire 2
|
||||
IOBUF PORT "RESET" IO_TYPE=LVCMOS33;
|
||||
|
||||
## LEDs ##########################################
|
||||
|
||||
LOCATE COMP "LEDS[0]" SITE "B2";
|
||||
LOCATE COMP "LEDS[1]" SITE "C2";
|
||||
LOCATE COMP "LEDS[2]" SITE "C1";
|
||||
LOCATE COMP "LEDS[3]" SITE "D2";
|
||||
LOCATE COMP "LEDS[4]" SITE "D1";
|
||||
|
||||
IOBUF PORT "LEDS[0]" IO_TYPE=LVCMOS33;
|
||||
IOBUF PORT "LEDS[1]" IO_TYPE=LVCMOS33;
|
||||
IOBUF PORT "LEDS[2]" IO_TYPE=LVCMOS33;
|
||||
IOBUF PORT "LEDS[3]" IO_TYPE=LVCMOS33;
|
||||
IOBUF PORT "LEDS[4]" IO_TYPE=LVCMOS33;
|
||||
|
||||
## UART ######################################################
|
||||
|
||||
LOCATE COMP "TXD" SITE "L4"; # FPGA transmits to ftdi
|
||||
LOCATE COMP "RXD" SITE "M1"; # FPGA receives from ftdi
|
||||
|
||||
IOBUF PORT "TXD" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;
|
||||
IOBUF PORT "RXD" PULLMODE=UP IO_TYPE=LVCMOS33;
|
||||
|
||||
@@ -0,0 +1,595 @@
|
||||
/*
|
||||
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
Original Author: Shay Gal-on
|
||||
*/
|
||||
|
||||
#include "coremark.h"
|
||||
/*
|
||||
Topic: Description
|
||||
Benchmark using a linked list.
|
||||
|
||||
Linked list is a common data structure used in many applications.
|
||||
|
||||
For our purposes, this will excercise the memory units of the processor.
|
||||
In particular, usage of the list pointers to find and alter data.
|
||||
|
||||
We are not using Malloc since some platforms do not support this
|
||||
library.
|
||||
|
||||
Instead, the memory block being passed in is used to create a list,
|
||||
and the benchmark takes care not to add more items then can be
|
||||
accommodated by the memory block. The porting layer will make sure
|
||||
that we have a valid memory block.
|
||||
|
||||
All operations are done in place, without using any extra memory.
|
||||
|
||||
The list itself contains list pointers and pointers to data items.
|
||||
Data items contain the following:
|
||||
|
||||
idx - An index that captures the initial order of the list.
|
||||
data - Variable data initialized based on the input parameters. The 16b
|
||||
are divided as follows: o Upper 8b are backup of original data. o Bit 7
|
||||
indicates if the lower 7 bits are to be used as is or calculated. o Bits 0-2
|
||||
indicate type of operation to perform to get a 7b value. o Bits 3-6 provide
|
||||
input for the operation.
|
||||
|
||||
*/
|
||||
|
||||
/* local functions */
|
||||
|
||||
list_head *core_list_find(list_head *list, list_data *info);
|
||||
list_head *core_list_reverse(list_head *list);
|
||||
list_head *core_list_remove(list_head *item);
|
||||
list_head *core_list_undo_remove(list_head *item_removed,
|
||||
list_head *item_modified);
|
||||
list_head *core_list_insert_new(list_head * insert_point,
|
||||
list_data * info,
|
||||
list_head **memblock,
|
||||
list_data **datablock,
|
||||
list_head * memblock_end,
|
||||
list_data * datablock_end);
|
||||
typedef ee_s32 (*list_cmp)(list_data *a, list_data *b, core_results *res);
|
||||
list_head *core_list_mergesort(list_head * list,
|
||||
list_cmp cmp,
|
||||
core_results *res);
|
||||
|
||||
ee_s16
|
||||
calc_func(ee_s16 *pdata, core_results *res)
|
||||
{
|
||||
ee_s16 data = *pdata;
|
||||
ee_s16 retval;
|
||||
ee_u8 optype
|
||||
= (data >> 7)
|
||||
& 1; /* bit 7 indicates if the function result has been cached */
|
||||
if (optype) /* if cached, use cache */
|
||||
return (data & 0x007f);
|
||||
else
|
||||
{ /* otherwise calculate and cache the result */
|
||||
ee_s16 flag = data & 0x7; /* bits 0-2 is type of function to perform */
|
||||
ee_s16 dtype
|
||||
= ((data >> 3)
|
||||
& 0xf); /* bits 3-6 is specific data for the operation */
|
||||
dtype |= dtype << 4; /* replicate the lower 4 bits to get an 8b value */
|
||||
switch (flag)
|
||||
{
|
||||
case 0:
|
||||
if (dtype < 0x22) /* set min period for bit corruption */
|
||||
dtype = 0x22;
|
||||
retval = core_bench_state(res->size,
|
||||
res->memblock[3],
|
||||
res->seed1,
|
||||
res->seed2,
|
||||
dtype,
|
||||
res->crc);
|
||||
if (res->crcstate == 0)
|
||||
res->crcstate = retval;
|
||||
break;
|
||||
case 1:
|
||||
retval = core_bench_matrix(&(res->mat), dtype, res->crc);
|
||||
if (res->crcmatrix == 0)
|
||||
res->crcmatrix = retval;
|
||||
break;
|
||||
default:
|
||||
retval = data;
|
||||
break;
|
||||
}
|
||||
res->crc = crcu16(retval, res->crc);
|
||||
retval &= 0x007f;
|
||||
*pdata = (data & 0xff00) | 0x0080 | retval; /* cache the result */
|
||||
return retval;
|
||||
}
|
||||
}
|
||||
/* Function: cmp_complex
|
||||
Compare the data item in a list cell.
|
||||
|
||||
Can be used by mergesort.
|
||||
*/
|
||||
ee_s32
|
||||
cmp_complex(list_data *a, list_data *b, core_results *res)
|
||||
{
|
||||
ee_s16 val1 = calc_func(&(a->data16), res);
|
||||
ee_s16 val2 = calc_func(&(b->data16), res);
|
||||
return val1 - val2;
|
||||
}
|
||||
|
||||
/* Function: cmp_idx
|
||||
Compare the idx item in a list cell, and regen the data.
|
||||
|
||||
Can be used by mergesort.
|
||||
*/
|
||||
ee_s32
|
||||
cmp_idx(list_data *a, list_data *b, core_results *res)
|
||||
{
|
||||
if (res == NULL)
|
||||
{
|
||||
a->data16 = (a->data16 & 0xff00) | (0x00ff & (a->data16 >> 8));
|
||||
b->data16 = (b->data16 & 0xff00) | (0x00ff & (b->data16 >> 8));
|
||||
}
|
||||
return a->idx - b->idx;
|
||||
}
|
||||
|
||||
void
|
||||
copy_info(list_data *to, list_data *from)
|
||||
{
|
||||
to->data16 = from->data16;
|
||||
to->idx = from->idx;
|
||||
}
|
||||
|
||||
/* Benchmark for linked list:
|
||||
- Try to find multiple data items.
|
||||
- List sort
|
||||
- Operate on data from list (crc)
|
||||
- Single remove/reinsert
|
||||
* At the end of this function, the list is back to original state
|
||||
*/
|
||||
ee_u16
|
||||
core_bench_list(core_results *res, ee_s16 finder_idx)
|
||||
{
|
||||
ee_u16 retval = 0;
|
||||
ee_u16 found = 0, missed = 0;
|
||||
list_head *list = res->list;
|
||||
ee_s16 find_num = res->seed3;
|
||||
list_head *this_find;
|
||||
list_head *finder, *remover;
|
||||
list_data info;
|
||||
ee_s16 i;
|
||||
|
||||
info.idx = finder_idx;
|
||||
/* find <find_num> values in the list, and change the list each time
|
||||
* (reverse and cache if value found) */
|
||||
for (i = 0; i < find_num; i++)
|
||||
{
|
||||
info.data16 = (i & 0xff);
|
||||
this_find = core_list_find(list, &info);
|
||||
list = core_list_reverse(list);
|
||||
if (this_find == NULL)
|
||||
{
|
||||
missed++;
|
||||
retval += (list->next->info->data16 >> 8) & 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
found++;
|
||||
if (this_find->info->data16 & 0x1) /* use found value */
|
||||
retval += (this_find->info->data16 >> 9) & 1;
|
||||
/* and cache next item at the head of the list (if any) */
|
||||
if (this_find->next != NULL)
|
||||
{
|
||||
finder = this_find->next;
|
||||
this_find->next = finder->next;
|
||||
finder->next = list->next;
|
||||
list->next = finder;
|
||||
}
|
||||
}
|
||||
if (info.idx >= 0)
|
||||
info.idx++;
|
||||
#if CORE_DEBUG
|
||||
ee_printf("List find %d: [%d,%d,%d]\n", i, retval, missed, found);
|
||||
#endif
|
||||
}
|
||||
retval += found * 4 - missed;
|
||||
/* sort the list by data content and remove one item*/
|
||||
if (finder_idx > 0)
|
||||
list = core_list_mergesort(list, cmp_complex, res);
|
||||
remover = core_list_remove(list->next);
|
||||
/* CRC data content of list from location of index N forward, and then undo
|
||||
* remove */
|
||||
finder = core_list_find(list, &info);
|
||||
if (!finder)
|
||||
finder = list->next;
|
||||
while (finder)
|
||||
{
|
||||
retval = crc16(list->info->data16, retval);
|
||||
finder = finder->next;
|
||||
}
|
||||
#if CORE_DEBUG
|
||||
ee_printf("List sort 1: %04x\n", retval);
|
||||
#endif
|
||||
remover = core_list_undo_remove(remover, list->next);
|
||||
/* sort the list by index, in effect returning the list to original state */
|
||||
list = core_list_mergesort(list, cmp_idx, NULL);
|
||||
/* CRC data content of list */
|
||||
finder = list->next;
|
||||
while (finder)
|
||||
{
|
||||
retval = crc16(list->info->data16, retval);
|
||||
finder = finder->next;
|
||||
}
|
||||
#if CORE_DEBUG
|
||||
ee_printf("List sort 2: %04x\n", retval);
|
||||
#endif
|
||||
return retval;
|
||||
}
|
||||
/* Function: core_list_init
|
||||
Initialize list with data.
|
||||
|
||||
Parameters:
|
||||
blksize - Size of memory to be initialized.
|
||||
memblock - Pointer to memory block.
|
||||
seed - Actual values chosen depend on the seed parameter.
|
||||
The seed parameter MUST be supplied from a source that cannot be
|
||||
determined at compile time
|
||||
|
||||
Returns:
|
||||
Pointer to the head of the list.
|
||||
|
||||
*/
|
||||
list_head *
|
||||
core_list_init(ee_u32 blksize, list_head *memblock, ee_s16 seed)
|
||||
{
|
||||
/* calculated pointers for the list */
|
||||
ee_u32 per_item = 16 + sizeof(struct list_data_s);
|
||||
ee_u32 size = (blksize / per_item)
|
||||
- 2; /* to accommodate systems with 64b pointers, and make sure
|
||||
same code is executed, set max list elements */
|
||||
list_head *memblock_end = memblock + size;
|
||||
list_data *datablock = (list_data *)(memblock_end);
|
||||
list_data *datablock_end = datablock + size;
|
||||
/* some useful variables */
|
||||
ee_u32 i;
|
||||
list_head *finder, *list = memblock;
|
||||
list_data info;
|
||||
|
||||
/* create a fake items for the list head and tail */
|
||||
list->next = NULL;
|
||||
list->info = datablock;
|
||||
list->info->idx = 0x0000;
|
||||
list->info->data16 = (ee_s16)0x8080;
|
||||
memblock++;
|
||||
datablock++;
|
||||
info.idx = 0x7fff;
|
||||
info.data16 = (ee_s16)0xffff;
|
||||
core_list_insert_new(
|
||||
list, &info, &memblock, &datablock, memblock_end, datablock_end);
|
||||
|
||||
/* then insert size items */
|
||||
for (i = 0; i < size; i++)
|
||||
{
|
||||
ee_u16 datpat = ((ee_u16)(seed ^ i) & 0xf);
|
||||
ee_u16 dat
|
||||
= (datpat << 3) | (i & 0x7); /* alternate between algorithms */
|
||||
info.data16 = (dat << 8) | dat; /* fill the data with actual data and
|
||||
upper bits with rebuild value */
|
||||
core_list_insert_new(
|
||||
list, &info, &memblock, &datablock, memblock_end, datablock_end);
|
||||
}
|
||||
/* and now index the list so we know initial seed order of the list */
|
||||
finder = list->next;
|
||||
i = 1;
|
||||
while (finder->next != NULL)
|
||||
{
|
||||
if (i < size / 5) /* first 20% of the list in order */
|
||||
finder->info->idx = i++;
|
||||
else
|
||||
{
|
||||
ee_u16 pat = (ee_u16)(i++ ^ seed); /* get a pseudo random number */
|
||||
finder->info->idx = 0x3fff
|
||||
& (((i & 0x07) << 8)
|
||||
| pat); /* make sure the mixed items end up
|
||||
after the ones in sequence */
|
||||
}
|
||||
finder = finder->next;
|
||||
}
|
||||
list = core_list_mergesort(list, cmp_idx, NULL);
|
||||
#if CORE_DEBUG
|
||||
ee_printf("Initialized list:\n");
|
||||
finder = list;
|
||||
while (finder)
|
||||
{
|
||||
ee_printf(
|
||||
"[%04x,%04x]", finder->info->idx, (ee_u16)finder->info->data16);
|
||||
finder = finder->next;
|
||||
}
|
||||
ee_printf("\n");
|
||||
#endif
|
||||
return list;
|
||||
}
|
||||
|
||||
/* Function: core_list_insert
|
||||
Insert an item to the list
|
||||
|
||||
Parameters:
|
||||
insert_point - where to insert the item.
|
||||
info - data for the cell.
|
||||
memblock - pointer for the list header
|
||||
datablock - pointer for the list data
|
||||
memblock_end - end of region for list headers
|
||||
datablock_end - end of region for list data
|
||||
|
||||
Returns:
|
||||
Pointer to new item.
|
||||
*/
|
||||
list_head *
|
||||
core_list_insert_new(list_head * insert_point,
|
||||
list_data * info,
|
||||
list_head **memblock,
|
||||
list_data **datablock,
|
||||
list_head * memblock_end,
|
||||
list_data * datablock_end)
|
||||
{
|
||||
list_head *newitem;
|
||||
|
||||
if ((*memblock + 1) >= memblock_end)
|
||||
return NULL;
|
||||
if ((*datablock + 1) >= datablock_end)
|
||||
return NULL;
|
||||
|
||||
newitem = *memblock;
|
||||
(*memblock)++;
|
||||
newitem->next = insert_point->next;
|
||||
insert_point->next = newitem;
|
||||
|
||||
newitem->info = *datablock;
|
||||
(*datablock)++;
|
||||
copy_info(newitem->info, info);
|
||||
|
||||
return newitem;
|
||||
}
|
||||
|
||||
/* Function: core_list_remove
|
||||
Remove an item from the list.
|
||||
|
||||
Operation:
|
||||
For a singly linked list, remove by copying the data from the next item
|
||||
over to the current cell, and unlinking the next item.
|
||||
|
||||
Note:
|
||||
since there is always a fake item at the end of the list, no need to
|
||||
check for NULL.
|
||||
|
||||
Returns:
|
||||
Removed item.
|
||||
*/
|
||||
list_head *
|
||||
core_list_remove(list_head *item)
|
||||
{
|
||||
list_data *tmp;
|
||||
list_head *ret = item->next;
|
||||
/* swap data pointers */
|
||||
tmp = item->info;
|
||||
item->info = ret->info;
|
||||
ret->info = tmp;
|
||||
/* and eliminate item */
|
||||
item->next = item->next->next;
|
||||
ret->next = NULL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Function: core_list_undo_remove
|
||||
Undo a remove operation.
|
||||
|
||||
Operation:
|
||||
Since we want each iteration of the benchmark to be exactly the same,
|
||||
we need to be able to undo a remove.
|
||||
Link the removed item back into the list, and switch the info items.
|
||||
|
||||
Parameters:
|
||||
item_removed - Return value from the <core_list_remove>
|
||||
item_modified - List item that was modified during <core_list_remove>
|
||||
|
||||
Returns:
|
||||
The item that was linked back to the list.
|
||||
|
||||
*/
|
||||
list_head *
|
||||
core_list_undo_remove(list_head *item_removed, list_head *item_modified)
|
||||
{
|
||||
list_data *tmp;
|
||||
/* swap data pointers */
|
||||
tmp = item_removed->info;
|
||||
item_removed->info = item_modified->info;
|
||||
item_modified->info = tmp;
|
||||
/* and insert item */
|
||||
item_removed->next = item_modified->next;
|
||||
item_modified->next = item_removed;
|
||||
return item_removed;
|
||||
}
|
||||
|
||||
/* Function: core_list_find
|
||||
Find an item in the list
|
||||
|
||||
Operation:
|
||||
Find an item by idx (if not 0) or specific data value
|
||||
|
||||
Parameters:
|
||||
list - list head
|
||||
info - idx or data to find
|
||||
|
||||
Returns:
|
||||
Found item, or NULL if not found.
|
||||
*/
|
||||
list_head *
|
||||
core_list_find(list_head *list, list_data *info)
|
||||
{
|
||||
if (info->idx >= 0)
|
||||
{
|
||||
while (list && (list->info->idx != info->idx))
|
||||
list = list->next;
|
||||
return list;
|
||||
}
|
||||
else
|
||||
{
|
||||
while (list && ((list->info->data16 & 0xff) != info->data16))
|
||||
list = list->next;
|
||||
return list;
|
||||
}
|
||||
}
|
||||
/* Function: core_list_reverse
|
||||
Reverse a list
|
||||
|
||||
Operation:
|
||||
Rearrange the pointers so the list is reversed.
|
||||
|
||||
Parameters:
|
||||
list - list head
|
||||
info - idx or data to find
|
||||
|
||||
Returns:
|
||||
Found item, or NULL if not found.
|
||||
*/
|
||||
|
||||
list_head *
|
||||
core_list_reverse(list_head *list)
|
||||
{
|
||||
list_head *next = NULL, *tmp;
|
||||
while (list)
|
||||
{
|
||||
tmp = list->next;
|
||||
list->next = next;
|
||||
next = list;
|
||||
list = tmp;
|
||||
}
|
||||
return next;
|
||||
}
|
||||
/* Function: core_list_mergesort
|
||||
Sort the list in place without recursion.
|
||||
|
||||
Description:
|
||||
Use mergesort, as for linked list this is a realistic solution.
|
||||
Also, since this is aimed at embedded, care was taken to use iterative
|
||||
rather then recursive algorithm. The sort can either return the list to
|
||||
original order (by idx) , or use the data item to invoke other other
|
||||
algorithms and change the order of the list.
|
||||
|
||||
Parameters:
|
||||
list - list to be sorted.
|
||||
cmp - cmp function to use
|
||||
|
||||
Returns:
|
||||
New head of the list.
|
||||
|
||||
Note:
|
||||
We have a special header for the list that will always be first,
|
||||
but the algorithm could theoretically modify where the list starts.
|
||||
|
||||
*/
|
||||
list_head *
|
||||
core_list_mergesort(list_head *list, list_cmp cmp, core_results *res)
|
||||
{
|
||||
list_head *p, *q, *e, *tail;
|
||||
ee_s32 insize, nmerges, psize, qsize, i;
|
||||
|
||||
insize = 1;
|
||||
|
||||
while (1)
|
||||
{
|
||||
p = list;
|
||||
list = NULL;
|
||||
tail = NULL;
|
||||
|
||||
nmerges = 0; /* count number of merges we do in this pass */
|
||||
|
||||
while (p)
|
||||
{
|
||||
nmerges++; /* there exists a merge to be done */
|
||||
/* step `insize' places along from p */
|
||||
q = p;
|
||||
psize = 0;
|
||||
for (i = 0; i < insize; i++)
|
||||
{
|
||||
psize++;
|
||||
q = q->next;
|
||||
if (!q)
|
||||
break;
|
||||
}
|
||||
|
||||
/* if q hasn't fallen off end, we have two lists to merge */
|
||||
qsize = insize;
|
||||
|
||||
/* now we have two lists; merge them */
|
||||
while (psize > 0 || (qsize > 0 && q))
|
||||
{
|
||||
|
||||
/* decide whether next element of merge comes from p or q */
|
||||
if (psize == 0)
|
||||
{
|
||||
/* p is empty; e must come from q. */
|
||||
e = q;
|
||||
q = q->next;
|
||||
qsize--;
|
||||
}
|
||||
else if (qsize == 0 || !q)
|
||||
{
|
||||
/* q is empty; e must come from p. */
|
||||
e = p;
|
||||
p = p->next;
|
||||
psize--;
|
||||
}
|
||||
else if (cmp(p->info, q->info, res) <= 0)
|
||||
{
|
||||
/* First element of p is lower (or same); e must come from
|
||||
* p. */
|
||||
e = p;
|
||||
p = p->next;
|
||||
psize--;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* First element of q is lower; e must come from q. */
|
||||
e = q;
|
||||
q = q->next;
|
||||
qsize--;
|
||||
}
|
||||
|
||||
/* add the next element to the merged list */
|
||||
if (tail)
|
||||
{
|
||||
tail->next = e;
|
||||
}
|
||||
else
|
||||
{
|
||||
list = e;
|
||||
}
|
||||
tail = e;
|
||||
}
|
||||
|
||||
/* now p has stepped `insize' places along, and q has too */
|
||||
p = q;
|
||||
}
|
||||
|
||||
tail->next = NULL;
|
||||
|
||||
/* If we have done only one merge, we're finished. */
|
||||
if (nmerges <= 1) /* allow for nmerges==0, the empty list case */
|
||||
return list;
|
||||
|
||||
/* Otherwise repeat, merging lists twice the size */
|
||||
insize *= 2;
|
||||
}
|
||||
#if COMPILER_REQUIRES_SORT_RETURN
|
||||
return list;
|
||||
#endif
|
||||
}
|
||||
@@ -0,0 +1,451 @@
|
||||
/*
|
||||
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
Original Author: Shay Gal-on
|
||||
*/
|
||||
|
||||
/* File: core_main.c
|
||||
This file contains the framework to acquire a block of memory, seed
|
||||
initial parameters, tun t he benchmark and report the results.
|
||||
*/
|
||||
#include "coremark.h"
|
||||
|
||||
/* Function: iterate
|
||||
Run the benchmark for a specified number of iterations.
|
||||
|
||||
Operation:
|
||||
For each type of benchmarked algorithm:
|
||||
a - Initialize the data block for the algorithm.
|
||||
b - Execute the algorithm N times.
|
||||
|
||||
Returns:
|
||||
NULL.
|
||||
*/
|
||||
static ee_u16 list_known_crc[] = { (ee_u16)0xd4b0,
|
||||
(ee_u16)0x3340,
|
||||
(ee_u16)0x6a79,
|
||||
(ee_u16)0xe714,
|
||||
(ee_u16)0xe3c1 };
|
||||
static ee_u16 matrix_known_crc[] = { (ee_u16)0xbe52,
|
||||
(ee_u16)0x1199,
|
||||
(ee_u16)0x5608,
|
||||
(ee_u16)0x1fd7,
|
||||
(ee_u16)0x0747 };
|
||||
static ee_u16 state_known_crc[] = { (ee_u16)0x5e47,
|
||||
(ee_u16)0x39bf,
|
||||
(ee_u16)0xe5a4,
|
||||
(ee_u16)0x8e3a,
|
||||
(ee_u16)0x8d84 };
|
||||
void *
|
||||
iterate(void *pres)
|
||||
{
|
||||
ee_u32 i;
|
||||
ee_u16 crc;
|
||||
core_results *res = (core_results *)pres;
|
||||
ee_u32 iterations = res->iterations;
|
||||
res->crc = 0;
|
||||
res->crclist = 0;
|
||||
res->crcmatrix = 0;
|
||||
res->crcstate = 0;
|
||||
|
||||
for (i = 0; i < iterations; i++)
|
||||
{
|
||||
crc = core_bench_list(res, 1);
|
||||
res->crc = crcu16(crc, res->crc);
|
||||
crc = core_bench_list(res, -1);
|
||||
res->crc = crcu16(crc, res->crc);
|
||||
if (i == 0)
|
||||
res->crclist = res->crc;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#if (SEED_METHOD == SEED_ARG)
|
||||
ee_s32 get_seed_args(int i, int argc, char *argv[]);
|
||||
#define get_seed(x) (ee_s16) get_seed_args(x, argc, argv)
|
||||
#define get_seed_32(x) get_seed_args(x, argc, argv)
|
||||
#else /* via function or volatile */
|
||||
ee_s32 get_seed_32(int i);
|
||||
#define get_seed(x) (ee_s16) get_seed_32(x)
|
||||
#endif
|
||||
|
||||
#if (MEM_METHOD == MEM_STATIC)
|
||||
ee_u8 static_memblk[TOTAL_DATA_SIZE];
|
||||
#endif
|
||||
char *mem_name[3] = { "Static", "Heap", "Stack" };
|
||||
/* Function: main
|
||||
Main entry routine for the benchmark.
|
||||
This function is responsible for the following steps:
|
||||
|
||||
1 - Initialize input seeds from a source that cannot be determined at
|
||||
compile time. 2 - Initialize memory block for use. 3 - Run and time the
|
||||
benchmark. 4 - Report results, testing the validity of the output if the
|
||||
seeds are known.
|
||||
|
||||
Arguments:
|
||||
1 - first seed : Any value
|
||||
2 - second seed : Must be identical to first for iterations to be
|
||||
identical 3 - third seed : Any value, should be at least an order of
|
||||
magnitude less then the input size, but bigger then 32. 4 - Iterations :
|
||||
Special, if set to 0, iterations will be automatically determined such that
|
||||
the benchmark will run between 10 to 100 secs
|
||||
|
||||
*/
|
||||
|
||||
#if MAIN_HAS_NOARGC
|
||||
MAIN_RETURN_TYPE
|
||||
main(void)
|
||||
{
|
||||
int argc = 0;
|
||||
char *argv[1];
|
||||
#else
|
||||
MAIN_RETURN_TYPE
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
#endif
|
||||
ee_u16 i, j = 0, num_algorithms = 0;
|
||||
ee_s16 known_id = -1, total_errors = 0;
|
||||
ee_u16 seedcrc = 0;
|
||||
CORE_TICKS total_time;
|
||||
core_results results[MULTITHREAD];
|
||||
#if (MEM_METHOD == MEM_STACK)
|
||||
ee_u8 stack_memblock[TOTAL_DATA_SIZE * MULTITHREAD];
|
||||
#endif
|
||||
/* first call any initializations needed */
|
||||
portable_init(&(results[0].port), &argc, argv);
|
||||
/* First some checks to make sure benchmark will run ok */
|
||||
if (sizeof(struct list_head_s) > 128)
|
||||
{
|
||||
ee_printf("list_head structure too big for comparable data!\n");
|
||||
return MAIN_RETURN_VAL;
|
||||
}
|
||||
results[0].seed1 = get_seed(1);
|
||||
results[0].seed2 = get_seed(2);
|
||||
results[0].seed3 = get_seed(3);
|
||||
results[0].iterations = get_seed_32(4);
|
||||
#if CORE_DEBUG
|
||||
results[0].iterations = 1;
|
||||
#endif
|
||||
results[0].execs = get_seed_32(5);
|
||||
if (results[0].execs == 0)
|
||||
{ /* if not supplied, execute all algorithms */
|
||||
results[0].execs = ALL_ALGORITHMS_MASK;
|
||||
}
|
||||
/* put in some default values based on one seed only for easy testing */
|
||||
if ((results[0].seed1 == 0) && (results[0].seed2 == 0)
|
||||
&& (results[0].seed3 == 0))
|
||||
{ /* performance run */
|
||||
results[0].seed1 = 0;
|
||||
results[0].seed2 = 0;
|
||||
results[0].seed3 = 0x66;
|
||||
}
|
||||
if ((results[0].seed1 == 1) && (results[0].seed2 == 0)
|
||||
&& (results[0].seed3 == 0))
|
||||
{ /* validation run */
|
||||
results[0].seed1 = 0x3415;
|
||||
results[0].seed2 = 0x3415;
|
||||
results[0].seed3 = 0x66;
|
||||
}
|
||||
#if (MEM_METHOD == MEM_STATIC)
|
||||
results[0].memblock[0] = (void *)static_memblk;
|
||||
results[0].size = TOTAL_DATA_SIZE;
|
||||
results[0].err = 0;
|
||||
#if (MULTITHREAD > 1)
|
||||
#error "Cannot use a static data area with multiple contexts!"
|
||||
#endif
|
||||
#elif (MEM_METHOD == MEM_MALLOC)
|
||||
for (i = 0; i < MULTITHREAD; i++)
|
||||
{
|
||||
ee_s32 malloc_override = get_seed(7);
|
||||
if (malloc_override != 0)
|
||||
results[i].size = malloc_override;
|
||||
else
|
||||
results[i].size = TOTAL_DATA_SIZE;
|
||||
results[i].memblock[0] = portable_malloc(results[i].size);
|
||||
results[i].seed1 = results[0].seed1;
|
||||
results[i].seed2 = results[0].seed2;
|
||||
results[i].seed3 = results[0].seed3;
|
||||
results[i].err = 0;
|
||||
results[i].execs = results[0].execs;
|
||||
}
|
||||
#elif (MEM_METHOD == MEM_STACK)
|
||||
for (i = 0; i < MULTITHREAD; i++)
|
||||
{
|
||||
results[i].memblock[0] = stack_memblock + i * TOTAL_DATA_SIZE;
|
||||
results[i].size = TOTAL_DATA_SIZE;
|
||||
results[i].seed1 = results[0].seed1;
|
||||
results[i].seed2 = results[0].seed2;
|
||||
results[i].seed3 = results[0].seed3;
|
||||
results[i].err = 0;
|
||||
results[i].execs = results[0].execs;
|
||||
}
|
||||
#else
|
||||
#error "Please define a way to initialize a memory block."
|
||||
#endif
|
||||
/* Data init */
|
||||
/* Find out how space much we have based on number of algorithms */
|
||||
for (i = 0; i < NUM_ALGORITHMS; i++)
|
||||
{
|
||||
if ((1 << (ee_u32)i) & results[0].execs)
|
||||
num_algorithms++;
|
||||
}
|
||||
for (i = 0; i < MULTITHREAD; i++)
|
||||
results[i].size = results[i].size / num_algorithms;
|
||||
/* Assign pointers */
|
||||
for (i = 0; i < NUM_ALGORITHMS; i++)
|
||||
{
|
||||
ee_u32 ctx;
|
||||
if ((1 << (ee_u32)i) & results[0].execs)
|
||||
{
|
||||
for (ctx = 0; ctx < MULTITHREAD; ctx++)
|
||||
results[ctx].memblock[i + 1]
|
||||
= (char *)(results[ctx].memblock[0]) + results[0].size * j;
|
||||
j++;
|
||||
}
|
||||
}
|
||||
/* call inits */
|
||||
for (i = 0; i < MULTITHREAD; i++)
|
||||
{
|
||||
if (results[i].execs & ID_LIST)
|
||||
{
|
||||
results[i].list = core_list_init(
|
||||
results[0].size, results[i].memblock[1], results[i].seed1);
|
||||
}
|
||||
if (results[i].execs & ID_MATRIX)
|
||||
{
|
||||
core_init_matrix(results[0].size,
|
||||
results[i].memblock[2],
|
||||
(ee_s32)results[i].seed1
|
||||
| (((ee_s32)results[i].seed2) << 16),
|
||||
&(results[i].mat));
|
||||
}
|
||||
if (results[i].execs & ID_STATE)
|
||||
{
|
||||
core_init_state(
|
||||
results[0].size, results[i].seed1, results[i].memblock[3]);
|
||||
}
|
||||
}
|
||||
|
||||
/* automatically determine number of iterations if not set */
|
||||
if (results[0].iterations == 0)
|
||||
{
|
||||
secs_ret secs_passed = 0;
|
||||
ee_u32 divisor;
|
||||
results[0].iterations = 1;
|
||||
while (secs_passed < (secs_ret)1)
|
||||
{
|
||||
results[0].iterations *= 10;
|
||||
start_time();
|
||||
iterate(&results[0]);
|
||||
stop_time();
|
||||
secs_passed = time_in_secs(get_time());
|
||||
}
|
||||
/* now we know it executes for at least 1 sec, set actual run time at
|
||||
* about 10 secs */
|
||||
divisor = (ee_u32)secs_passed;
|
||||
if (divisor == 0) /* some machines cast float to int as 0 since this
|
||||
conversion is not defined by ANSI, but we know at
|
||||
least one second passed */
|
||||
divisor = 1;
|
||||
results[0].iterations *= 1 + 10 / divisor;
|
||||
}
|
||||
/* perform actual benchmark */
|
||||
start_time();
|
||||
#if (MULTITHREAD > 1)
|
||||
if (default_num_contexts > MULTITHREAD)
|
||||
{
|
||||
default_num_contexts = MULTITHREAD;
|
||||
}
|
||||
for (i = 0; i < default_num_contexts; i++)
|
||||
{
|
||||
results[i].iterations = results[0].iterations;
|
||||
results[i].execs = results[0].execs;
|
||||
core_start_parallel(&results[i]);
|
||||
}
|
||||
for (i = 0; i < default_num_contexts; i++)
|
||||
{
|
||||
core_stop_parallel(&results[i]);
|
||||
}
|
||||
#else
|
||||
iterate(&results[0]);
|
||||
#endif
|
||||
stop_time();
|
||||
total_time = get_time();
|
||||
/* get a function of the input to report */
|
||||
seedcrc = crc16(results[0].seed1, seedcrc);
|
||||
seedcrc = crc16(results[0].seed2, seedcrc);
|
||||
seedcrc = crc16(results[0].seed3, seedcrc);
|
||||
seedcrc = crc16(results[0].size, seedcrc);
|
||||
|
||||
switch (seedcrc)
|
||||
{ /* test known output for common seeds */
|
||||
case 0x8a02: /* seed1=0, seed2=0, seed3=0x66, size 2000 per algorithm */
|
||||
known_id = 0;
|
||||
ee_printf("6k performance run parameters for coremark.\n");
|
||||
break;
|
||||
case 0x7b05: /* seed1=0x3415, seed2=0x3415, seed3=0x66, size 2000 per
|
||||
algorithm */
|
||||
known_id = 1;
|
||||
ee_printf("6k validation run parameters for coremark.\n");
|
||||
break;
|
||||
case 0x4eaf: /* seed1=0x8, seed2=0x8, seed3=0x8, size 400 per algorithm
|
||||
*/
|
||||
known_id = 2;
|
||||
ee_printf("Profile generation run parameters for coremark.\n");
|
||||
break;
|
||||
case 0xe9f5: /* seed1=0, seed2=0, seed3=0x66, size 666 per algorithm */
|
||||
known_id = 3;
|
||||
ee_printf("2K performance run parameters for coremark.\n");
|
||||
break;
|
||||
case 0x18f2: /* seed1=0x3415, seed2=0x3415, seed3=0x66, size 666 per
|
||||
algorithm */
|
||||
known_id = 4;
|
||||
ee_printf("2K validation run parameters for coremark.\n");
|
||||
break;
|
||||
default:
|
||||
total_errors = -1;
|
||||
break;
|
||||
}
|
||||
if (known_id >= 0)
|
||||
{
|
||||
for (i = 0; i < default_num_contexts; i++)
|
||||
{
|
||||
results[i].err = 0;
|
||||
if ((results[i].execs & ID_LIST)
|
||||
&& (results[i].crclist != list_known_crc[known_id]))
|
||||
{
|
||||
ee_printf("[%u]ERROR! list crc 0x%04x - should be 0x%04x\n",
|
||||
i,
|
||||
results[i].crclist,
|
||||
list_known_crc[known_id]);
|
||||
results[i].err++;
|
||||
}
|
||||
if ((results[i].execs & ID_MATRIX)
|
||||
&& (results[i].crcmatrix != matrix_known_crc[known_id]))
|
||||
{
|
||||
ee_printf("[%u]ERROR! matrix crc 0x%04x - should be 0x%04x\n",
|
||||
i,
|
||||
results[i].crcmatrix,
|
||||
matrix_known_crc[known_id]);
|
||||
results[i].err++;
|
||||
}
|
||||
if ((results[i].execs & ID_STATE)
|
||||
&& (results[i].crcstate != state_known_crc[known_id]))
|
||||
{
|
||||
ee_printf("[%u]ERROR! state crc 0x%04x - should be 0x%04x\n",
|
||||
i,
|
||||
results[i].crcstate,
|
||||
state_known_crc[known_id]);
|
||||
results[i].err++;
|
||||
}
|
||||
total_errors += results[i].err;
|
||||
}
|
||||
}
|
||||
total_errors += check_data_types();
|
||||
/* and report results */
|
||||
ee_printf("CoreMark Size : %lu\n", (long unsigned)results[0].size);
|
||||
ee_printf("Total ticks : %lu\n", (long unsigned)total_time);
|
||||
#if HAS_FLOAT
|
||||
ee_printf("Total time (secs): %f\n", time_in_secs(total_time));
|
||||
if (time_in_secs(total_time) > 0)
|
||||
ee_printf("Iterations/Sec : %f\n",
|
||||
(default_num_contexts * results[0].iterations)
|
||||
/ time_in_secs(total_time));
|
||||
#else
|
||||
/*
|
||||
ee_printf("Total time (secs): %d\n", time_in_secs(total_time));
|
||||
if (time_in_secs(total_time) > 0)
|
||||
ee_printf("Iterations/Sec : %d\n",
|
||||
default_num_contexts * results[0].iterations
|
||||
/ time_in_secs(total_time));
|
||||
*/
|
||||
#endif
|
||||
|
||||
|
||||
print_coremarks(total_time);
|
||||
|
||||
if (time_in_secs(total_time) < 10)
|
||||
{
|
||||
ee_printf(
|
||||
"ERROR! Must execute for at least 10 secs for a valid result!\n");
|
||||
// total_errors++;
|
||||
}
|
||||
|
||||
/*
|
||||
ee_printf("Iterations : %lu\n",
|
||||
(long unsigned)default_num_contexts * results[0].iterations);
|
||||
ee_printf("Compiler version : %s\n", COMPILER_VERSION);
|
||||
ee_printf("Compiler flags : %s\n", COMPILER_FLAGS);
|
||||
*/
|
||||
|
||||
#if (MULTITHREAD > 1)
|
||||
ee_printf("Parallel %s : %d\n", PARALLEL_METHOD, default_num_contexts);
|
||||
#endif
|
||||
ee_printf("Memory location : %s\n", MEM_LOCATION);
|
||||
/* output for verification */
|
||||
ee_printf("seedcrc : 0x%04x\n", seedcrc);
|
||||
if (results[0].execs & ID_LIST)
|
||||
for (i = 0; i < default_num_contexts; i++)
|
||||
ee_printf("[%d]crclist : 0x%04x\n", i, results[i].crclist);
|
||||
if (results[0].execs & ID_MATRIX)
|
||||
for (i = 0; i < default_num_contexts; i++)
|
||||
ee_printf("[%d]crcmatrix : 0x%04x\n", i, results[i].crcmatrix);
|
||||
if (results[0].execs & ID_STATE)
|
||||
for (i = 0; i < default_num_contexts; i++)
|
||||
ee_printf("[%d]crcstate : 0x%04x\n", i, results[i].crcstate);
|
||||
for (i = 0; i < default_num_contexts; i++)
|
||||
ee_printf("[%d]crcfinal : 0x%04x\n", i, results[i].crc);
|
||||
if (total_errors == 0)
|
||||
{
|
||||
ee_printf(
|
||||
"Correct operation validated. See README.md for run and reporting "
|
||||
"rules.\n");
|
||||
#if HAS_FLOAT
|
||||
if (known_id == 3)
|
||||
{
|
||||
ee_printf("CoreMark 1.0 : %f / %s %s",
|
||||
default_num_contexts * results[0].iterations
|
||||
/ time_in_secs(total_time),
|
||||
COMPILER_VERSION,
|
||||
COMPILER_FLAGS);
|
||||
#if defined(MEM_LOCATION) && !defined(MEM_LOCATION_UNSPEC)
|
||||
ee_printf(" / %s", MEM_LOCATION);
|
||||
#else
|
||||
ee_printf(" / %s", mem_name[MEM_METHOD]);
|
||||
#endif
|
||||
|
||||
#if (MULTITHREAD > 1)
|
||||
ee_printf(" / %d:%s", default_num_contexts, PARALLEL_METHOD);
|
||||
#endif
|
||||
ee_printf("\n");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
if (total_errors > 0)
|
||||
ee_printf("Errors detected\n");
|
||||
if (total_errors < 0)
|
||||
ee_printf(
|
||||
"Cannot validate operation for these seed values, please compare "
|
||||
"with results on a known platform.\n");
|
||||
|
||||
#if (MEM_METHOD == MEM_MALLOC)
|
||||
for (i = 0; i < MULTITHREAD; i++)
|
||||
portable_free(results[i].memblock[0]);
|
||||
#endif
|
||||
/* And last call any target specific code for finalizing */
|
||||
portable_fini(&(results[0].port));
|
||||
|
||||
return MAIN_RETURN_VAL;
|
||||
}
|
||||
@@ -0,0 +1,359 @@
|
||||
/*
|
||||
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
Original Author: Shay Gal-on
|
||||
*/
|
||||
|
||||
#include "coremark.h"
|
||||
/*
|
||||
Topic: Description
|
||||
Matrix manipulation benchmark
|
||||
|
||||
This very simple algorithm forms the basis of many more complex
|
||||
algorithms.
|
||||
|
||||
The tight inner loop is the focus of many optimizations (compiler as
|
||||
well as hardware based) and is thus relevant for embedded processing.
|
||||
|
||||
The total available data space will be divided to 3 parts:
|
||||
NxN Matrix A - initialized with small values (upper 3/4 of the bits all
|
||||
zero). NxN Matrix B - initialized with medium values (upper half of the bits all
|
||||
zero). NxN Matrix C - used for the result.
|
||||
|
||||
The actual values for A and B must be derived based on input that is not
|
||||
available at compile time.
|
||||
*/
|
||||
ee_s16 matrix_test(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B, MATDAT val);
|
||||
ee_s16 matrix_sum(ee_u32 N, MATRES *C, MATDAT clipval);
|
||||
void matrix_mul_const(ee_u32 N, MATRES *C, MATDAT *A, MATDAT val);
|
||||
void matrix_mul_vect(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B);
|
||||
void matrix_mul_matrix(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B);
|
||||
void matrix_mul_matrix_bitextract(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B);
|
||||
void matrix_add_const(ee_u32 N, MATDAT *A, MATDAT val);
|
||||
|
||||
#define matrix_test_next(x) (x + 1)
|
||||
#define matrix_clip(x, y) ((y) ? (x)&0x0ff : (x)&0x0ffff)
|
||||
#define matrix_big(x) (0xf000 | (x))
|
||||
#define bit_extract(x, from, to) (((x) >> (from)) & (~(0xffffffff << (to))))
|
||||
|
||||
#if CORE_DEBUG
|
||||
void
|
||||
printmat(MATDAT *A, ee_u32 N, char *name)
|
||||
{
|
||||
ee_u32 i, j;
|
||||
ee_printf("Matrix %s [%dx%d]:\n", name, N, N);
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
for (j = 0; j < N; j++)
|
||||
{
|
||||
if (j != 0)
|
||||
ee_printf(",");
|
||||
ee_printf("%d", A[i * N + j]);
|
||||
}
|
||||
ee_printf("\n");
|
||||
}
|
||||
}
|
||||
void
|
||||
printmatC(MATRES *C, ee_u32 N, char *name)
|
||||
{
|
||||
ee_u32 i, j;
|
||||
ee_printf("Matrix %s [%dx%d]:\n", name, N, N);
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
for (j = 0; j < N; j++)
|
||||
{
|
||||
if (j != 0)
|
||||
ee_printf(",");
|
||||
ee_printf("%d", C[i * N + j]);
|
||||
}
|
||||
ee_printf("\n");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
/* Function: core_bench_matrix
|
||||
Benchmark function
|
||||
|
||||
Iterate <matrix_test> N times,
|
||||
changing the matrix values slightly by a constant amount each time.
|
||||
*/
|
||||
ee_u16
|
||||
core_bench_matrix(mat_params *p, ee_s16 seed, ee_u16 crc)
|
||||
{
|
||||
ee_u32 N = p->N;
|
||||
MATRES *C = p->C;
|
||||
MATDAT *A = p->A;
|
||||
MATDAT *B = p->B;
|
||||
MATDAT val = (MATDAT)seed;
|
||||
|
||||
crc = crc16(matrix_test(N, C, A, B, val), crc);
|
||||
|
||||
return crc;
|
||||
}
|
||||
|
||||
/* Function: matrix_test
|
||||
Perform matrix manipulation.
|
||||
|
||||
Parameters:
|
||||
N - Dimensions of the matrix.
|
||||
C - memory for result matrix.
|
||||
A - input matrix
|
||||
B - operator matrix (not changed during operations)
|
||||
|
||||
Returns:
|
||||
A CRC value that captures all results calculated in the function.
|
||||
In particular, crc of the value calculated on the result matrix
|
||||
after each step by <matrix_sum>.
|
||||
|
||||
Operation:
|
||||
|
||||
1 - Add a constant value to all elements of a matrix.
|
||||
2 - Multiply a matrix by a constant.
|
||||
3 - Multiply a matrix by a vector.
|
||||
4 - Multiply a matrix by a matrix.
|
||||
5 - Add a constant value to all elements of a matrix.
|
||||
|
||||
After the last step, matrix A is back to original contents.
|
||||
*/
|
||||
ee_s16
|
||||
matrix_test(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B, MATDAT val)
|
||||
{
|
||||
ee_u16 crc = 0;
|
||||
MATDAT clipval = matrix_big(val);
|
||||
|
||||
matrix_add_const(N, A, val); /* make sure data changes */
|
||||
#if CORE_DEBUG
|
||||
printmat(A, N, "matrix_add_const");
|
||||
#endif
|
||||
matrix_mul_const(N, C, A, val);
|
||||
crc = crc16(matrix_sum(N, C, clipval), crc);
|
||||
#if CORE_DEBUG
|
||||
printmatC(C, N, "matrix_mul_const");
|
||||
#endif
|
||||
matrix_mul_vect(N, C, A, B);
|
||||
crc = crc16(matrix_sum(N, C, clipval), crc);
|
||||
#if CORE_DEBUG
|
||||
printmatC(C, N, "matrix_mul_vect");
|
||||
#endif
|
||||
matrix_mul_matrix(N, C, A, B);
|
||||
crc = crc16(matrix_sum(N, C, clipval), crc);
|
||||
#if CORE_DEBUG
|
||||
printmatC(C, N, "matrix_mul_matrix");
|
||||
#endif
|
||||
matrix_mul_matrix_bitextract(N, C, A, B);
|
||||
crc = crc16(matrix_sum(N, C, clipval), crc);
|
||||
#if CORE_DEBUG
|
||||
printmatC(C, N, "matrix_mul_matrix_bitextract");
|
||||
#endif
|
||||
|
||||
matrix_add_const(N, A, -val); /* return matrix to initial value */
|
||||
return crc;
|
||||
}
|
||||
|
||||
/* Function : matrix_init
|
||||
Initialize the memory block for matrix benchmarking.
|
||||
|
||||
Parameters:
|
||||
blksize - Size of memory to be initialized.
|
||||
memblk - Pointer to memory block.
|
||||
seed - Actual values chosen depend on the seed parameter.
|
||||
p - pointers to <mat_params> containing initialized matrixes.
|
||||
|
||||
Returns:
|
||||
Matrix dimensions.
|
||||
|
||||
Note:
|
||||
The seed parameter MUST be supplied from a source that cannot be
|
||||
determined at compile time
|
||||
*/
|
||||
ee_u32
|
||||
core_init_matrix(ee_u32 blksize, void *memblk, ee_s32 seed, mat_params *p)
|
||||
{
|
||||
ee_u32 N = 0;
|
||||
MATDAT *A;
|
||||
MATDAT *B;
|
||||
ee_s32 order = 1;
|
||||
MATDAT val;
|
||||
ee_u32 i = 0, j = 0;
|
||||
if (seed == 0)
|
||||
seed = 1;
|
||||
while (j < blksize)
|
||||
{
|
||||
i++;
|
||||
j = i * i * 2 * 4;
|
||||
}
|
||||
N = i - 1;
|
||||
A = (MATDAT *)align_mem(memblk);
|
||||
B = A + N * N;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
for (j = 0; j < N; j++)
|
||||
{
|
||||
seed = ((order * seed) % 65536);
|
||||
val = (seed + order);
|
||||
val = matrix_clip(val, 0);
|
||||
B[i * N + j] = val;
|
||||
val = (val + order);
|
||||
val = matrix_clip(val, 1);
|
||||
A[i * N + j] = val;
|
||||
order++;
|
||||
}
|
||||
}
|
||||
|
||||
p->A = A;
|
||||
p->B = B;
|
||||
p->C = (MATRES *)align_mem(B + N * N);
|
||||
p->N = N;
|
||||
#if CORE_DEBUG
|
||||
printmat(A, N, "A");
|
||||
printmat(B, N, "B");
|
||||
#endif
|
||||
return N;
|
||||
}
|
||||
|
||||
/* Function: matrix_sum
|
||||
Calculate a function that depends on the values of elements in the
|
||||
matrix.
|
||||
|
||||
For each element, accumulate into a temporary variable.
|
||||
|
||||
As long as this value is under the parameter clipval,
|
||||
add 1 to the result if the element is bigger then the previous.
|
||||
|
||||
Otherwise, reset the accumulator and add 10 to the result.
|
||||
*/
|
||||
ee_s16
|
||||
matrix_sum(ee_u32 N, MATRES *C, MATDAT clipval)
|
||||
{
|
||||
MATRES tmp = 0, prev = 0, cur = 0;
|
||||
ee_s16 ret = 0;
|
||||
ee_u32 i, j;
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
for (j = 0; j < N; j++)
|
||||
{
|
||||
cur = C[i * N + j];
|
||||
tmp += cur;
|
||||
if (tmp > clipval)
|
||||
{
|
||||
ret += 10;
|
||||
tmp = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
ret += (cur > prev) ? 1 : 0;
|
||||
}
|
||||
prev = cur;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Function: matrix_mul_const
|
||||
Multiply a matrix by a constant.
|
||||
This could be used as a scaler for instance.
|
||||
*/
|
||||
void
|
||||
matrix_mul_const(ee_u32 N, MATRES *C, MATDAT *A, MATDAT val)
|
||||
{
|
||||
ee_u32 i, j;
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
for (j = 0; j < N; j++)
|
||||
{
|
||||
C[i * N + j] = (MATRES)A[i * N + j] * (MATRES)val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Function: matrix_add_const
|
||||
Add a constant value to all elements of a matrix.
|
||||
*/
|
||||
void
|
||||
matrix_add_const(ee_u32 N, MATDAT *A, MATDAT val)
|
||||
{
|
||||
ee_u32 i, j;
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
for (j = 0; j < N; j++)
|
||||
{
|
||||
A[i * N + j] += val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Function: matrix_mul_vect
|
||||
Multiply a matrix by a vector.
|
||||
This is common in many simple filters (e.g. fir where a vector of
|
||||
coefficients is applied to the matrix.)
|
||||
*/
|
||||
void
|
||||
matrix_mul_vect(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B)
|
||||
{
|
||||
ee_u32 i, j;
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
C[i] = 0;
|
||||
for (j = 0; j < N; j++)
|
||||
{
|
||||
C[i] += (MATRES)A[i * N + j] * (MATRES)B[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Function: matrix_mul_matrix
|
||||
Multiply a matrix by a matrix.
|
||||
Basic code is used in many algorithms, mostly with minor changes such as
|
||||
scaling.
|
||||
*/
|
||||
void
|
||||
matrix_mul_matrix(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B)
|
||||
{
|
||||
ee_u32 i, j, k;
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
for (j = 0; j < N; j++)
|
||||
{
|
||||
C[i * N + j] = 0;
|
||||
for (k = 0; k < N; k++)
|
||||
{
|
||||
C[i * N + j] += (MATRES)A[i * N + k] * (MATRES)B[k * N + j];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Function: matrix_mul_matrix_bitextract
|
||||
Multiply a matrix by a matrix, and extract some bits from the result.
|
||||
Basic code is used in many algorithms, mostly with minor changes such as
|
||||
scaling.
|
||||
*/
|
||||
void
|
||||
matrix_mul_matrix_bitextract(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B)
|
||||
{
|
||||
ee_u32 i, j, k;
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
for (j = 0; j < N; j++)
|
||||
{
|
||||
C[i * N + j] = 0;
|
||||
for (k = 0; k < N; k++)
|
||||
{
|
||||
MATRES tmp = (MATRES)A[i * N + k] * (MATRES)B[k * N + j];
|
||||
C[i * N + j] += bit_extract(tmp, 2, 4) * bit_extract(tmp, 5, 7);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,215 @@
|
||||
/*
|
||||
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
Original Author: Shay Gal-on
|
||||
*/
|
||||
#include <io.h>
|
||||
#include <stdio.h>
|
||||
#include "coremark.h"
|
||||
#include "core_portme.h"
|
||||
#include <perf.h>
|
||||
|
||||
#if VALIDATION_RUN
|
||||
volatile ee_s32 seed1_volatile = 0x3415;
|
||||
volatile ee_s32 seed2_volatile = 0x3415;
|
||||
volatile ee_s32 seed3_volatile = 0x66;
|
||||
#endif
|
||||
#if PERFORMANCE_RUN
|
||||
volatile ee_s32 seed1_volatile = 0x0;
|
||||
volatile ee_s32 seed2_volatile = 0x0;
|
||||
volatile ee_s32 seed3_volatile = 0x66;
|
||||
#endif
|
||||
#if PROFILE_RUN
|
||||
volatile ee_s32 seed1_volatile = 0x8;
|
||||
volatile ee_s32 seed2_volatile = 0x8;
|
||||
volatile ee_s32 seed3_volatile = 0x8;
|
||||
#endif
|
||||
volatile ee_s32 seed4_volatile = ITERATIONS;
|
||||
volatile ee_s32 seed5_volatile = 0;
|
||||
|
||||
/* Porting : Timing functions
|
||||
How to capture time and convert to seconds must be ported to whatever is
|
||||
supported by the platform. e.g. Read value from on board RTC, read value from
|
||||
cpu clock cycles performance counter etc. Sample implementation for standard
|
||||
time.h and windows.h definitions included.
|
||||
*/
|
||||
CORETIMETYPE barebones_clock()
|
||||
{
|
||||
return (CORETIMETYPE)(rdcycle());
|
||||
}
|
||||
|
||||
/* Define : TIMER_RES_DIVIDER
|
||||
Divider to trade off timer resolution and total time that can be
|
||||
measured.
|
||||
|
||||
Use lower values to increase resolution, but make sure that overflow
|
||||
does not occur. If there are issues with the return value overflowing,
|
||||
increase this value.
|
||||
*/
|
||||
#define CLOCKS_PER_SEC 10000000
|
||||
#define GETMYTIME(_t) (*_t = barebones_clock())
|
||||
#define MYTIMEDIFF(fin, ini) ((fin) - (ini))
|
||||
#define TIMER_RES_DIVIDER 1
|
||||
#define SAMPLE_TIME_IMPLEMENTATION 1
|
||||
#define EE_TICKS_PER_SEC (CLOCKS_PER_SEC / TIMER_RES_DIVIDER)
|
||||
|
||||
/** Define Host specific (POSIX), or target specific global time variables. */
|
||||
static CORETIMETYPE start_time_val, stop_time_val;
|
||||
|
||||
/* Function : start_time
|
||||
This function will be called right before starting the timed portion of
|
||||
the benchmark.
|
||||
|
||||
Implementation may be capturing a system timer (as implemented in the
|
||||
example code) or zeroing some system parameters - e.g. setting the cpu clocks
|
||||
cycles to 0.
|
||||
*/
|
||||
void
|
||||
start_time(void)
|
||||
{
|
||||
GETMYTIME(&start_time_val);
|
||||
}
|
||||
/* Function : stop_time
|
||||
This function will be called right after ending the timed portion of the
|
||||
benchmark.
|
||||
|
||||
Implementation may be capturing a system timer (as implemented in the
|
||||
example code) or other system parameters - e.g. reading the current value of
|
||||
cpu cycles counter.
|
||||
*/
|
||||
void
|
||||
stop_time(void)
|
||||
{
|
||||
GETMYTIME(&stop_time_val);
|
||||
}
|
||||
/* Function : get_time
|
||||
Return an abstract "ticks" number that signifies time on the system.
|
||||
|
||||
Actual value returned may be cpu cycles, milliseconds or any other
|
||||
value, as long as it can be converted to seconds by <time_in_secs>. This
|
||||
methodology is taken to accommodate any hardware or simulated platform. The
|
||||
sample implementation returns millisecs by default, and the resolution is
|
||||
controlled by <TIMER_RES_DIVIDER>
|
||||
*/
|
||||
CORE_TICKS
|
||||
get_time(void)
|
||||
{
|
||||
CORE_TICKS elapsed
|
||||
= (CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val));
|
||||
return elapsed;
|
||||
}
|
||||
/* Function : time_in_secs
|
||||
Convert the value returned by get_time to seconds.
|
||||
|
||||
The <secs_ret> type is used to accommodate systems with no support for
|
||||
floating point. Default implementation implemented by the EE_TICKS_PER_SEC
|
||||
macro above.
|
||||
*/
|
||||
secs_ret
|
||||
time_in_secs(CORE_TICKS ticks)
|
||||
{
|
||||
secs_ret retval = ((secs_ret)ticks) / (secs_ret)EE_TICKS_PER_SEC;
|
||||
return retval;
|
||||
}
|
||||
|
||||
ee_u32 default_num_contexts = 1;
|
||||
|
||||
/* Function : portable_init
|
||||
Target specific initialization code
|
||||
Test for some common mistakes.
|
||||
*/
|
||||
void
|
||||
portable_init(core_portable *p, int *argc, char *argv[])
|
||||
{
|
||||
//usleep(100);
|
||||
//io.led = 0xF;
|
||||
|
||||
// ee_printf("board: %s (id=%d)\n",board_name(io.board_id),io.board_id);
|
||||
ee_printf("build: %s for %s\n",BUILD,ARCH);
|
||||
|
||||
// ee_printf("core%d: ", io.core_id); // core id
|
||||
// ee_printf("darkriscv@%dMHz with: ",io.board_cm*2); // board clock MHz
|
||||
// ee_printf("rv32%s ", check4rv32i()?"i":"e"); // architecture
|
||||
ee_printf("\n");
|
||||
// ee_printf("uart0: 115200 bps (div=%d)\n",io.uart.baud);
|
||||
// ee_printf("timr0: frequency=%dHz (io.timer=%d)\n",(io.board_cm*2000000u)/(io.timer+1),io.timer);
|
||||
|
||||
ee_printf("\n\n");
|
||||
|
||||
// ee_printf("CoreMark start in %d us.\n",io.timeus);
|
||||
|
||||
// #error "Call board initialization routines in portable init (if needed), in particular initialize UART!\n"
|
||||
if (sizeof(ee_ptr_int) != sizeof(ee_u8 *))
|
||||
{
|
||||
ee_printf(
|
||||
"ERROR! Please define ee_ptr_int to a type that holds a "
|
||||
"pointer!\n");
|
||||
}
|
||||
if (sizeof(ee_u32) != 4)
|
||||
{
|
||||
ee_printf("ERROR! Please define ee_u32 to a 32b unsigned type!\n");
|
||||
}
|
||||
p->portable_id = 1;
|
||||
}
|
||||
|
||||
|
||||
// Print "fixed point" number (integer/1000)
|
||||
void printk(uint64_t kx) {
|
||||
int intpart = (int)(kx / 1000);
|
||||
int fracpart = (int)(kx % 1000);
|
||||
printf("%d.",intpart);
|
||||
if(fracpart<100) {
|
||||
printf("0");
|
||||
}
|
||||
if(fracpart<10) {
|
||||
printf("0");
|
||||
}
|
||||
printf("%d",fracpart);
|
||||
}
|
||||
|
||||
|
||||
void print_coremarks(uint64_t ticks) {
|
||||
const uint64_t MHz = CLOCKS_PER_SEC/1000000;
|
||||
// printf("*** MHz : %d\n",(int)MHz);
|
||||
printf("*** Ticks : %d\n",(int)ticks);
|
||||
uint64_t ksecs=ticks/(CLOCKS_PER_SEC/1000);
|
||||
// printf("*** Time : "); printk(ksecs); printf("\n");
|
||||
uint64_t kiter_per_sec= (uint64_t)(ITERATIONS*1000*1000)/ksecs;
|
||||
// printf("*** Iter/s : "); printk(kiter_per_sec); printf("\n");
|
||||
printf("*** Coremark/MHz : "); printk(kiter_per_sec/MHz); printf("\n");
|
||||
|
||||
uint64_t kticks2 = rdcycle() * (uint64_t)1000;
|
||||
uint64_t instret2 = rdinstret();
|
||||
printf("*** CPI (2) : "); printk(kticks2/instret2); printf("\n");
|
||||
}
|
||||
|
||||
/* Function : portable_fini
|
||||
Target specific final code
|
||||
*/
|
||||
void
|
||||
portable_fini(core_portable *p)
|
||||
{
|
||||
//io.led = 0;
|
||||
//ee_printf("CoreMark finish in %d us.\n\n",io.timeus);
|
||||
p->portable_id = 0;
|
||||
|
||||
// makes no sense return here!
|
||||
|
||||
//while(1)
|
||||
//{
|
||||
// usleep(500000);
|
||||
// io.led++;
|
||||
//}
|
||||
}
|
||||
@@ -0,0 +1,225 @@
|
||||
#pragma once
|
||||
#include <stdint.h>
|
||||
|
||||
/*
|
||||
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
Original Author: Shay Gal-on
|
||||
*/
|
||||
|
||||
#define ITERATIONS 300
|
||||
#define BUILD "testbench"
|
||||
#define ARCH "petituyau"
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
/* Topic : Description
|
||||
This file contains configuration constants required to execute on
|
||||
different platforms
|
||||
*/
|
||||
#ifndef CORE_PORTME_H
|
||||
#define CORE_PORTME_H
|
||||
/************************/
|
||||
/* Data types and settings */
|
||||
/************************/
|
||||
/* Configuration : HAS_FLOAT
|
||||
Define to 1 if the platform supports floating point.
|
||||
*/
|
||||
#ifndef HAS_FLOAT
|
||||
#define HAS_FLOAT 0
|
||||
#endif
|
||||
/* Configuration : HAS_TIME_H
|
||||
Define to 1 if platform has the time.h header file,
|
||||
and implementation of functions thereof.
|
||||
*/
|
||||
#ifndef HAS_TIME_H
|
||||
#define HAS_TIME_H 0
|
||||
#endif
|
||||
/* Configuration : USE_CLOCK
|
||||
Define to 1 if platform has the time.h header file,
|
||||
and implementation of functions thereof.
|
||||
*/
|
||||
#ifndef USE_CLOCK
|
||||
#define USE_CLOCK 0
|
||||
#endif
|
||||
/* Configuration : HAS_STDIO
|
||||
Define to 1 if the platform has stdio.h.
|
||||
*/
|
||||
#ifndef HAS_STDIO
|
||||
#define HAS_STDIO 0
|
||||
#endif
|
||||
/* Configuration : HAS_PRINTF
|
||||
Define to 1 if the platform has stdio.h and implements the printf
|
||||
function.
|
||||
*/
|
||||
#ifndef HAS_PRINTF
|
||||
#define HAS_PRINTF 0
|
||||
#endif
|
||||
|
||||
/* Definitions : COMPILER_VERSION, COMPILER_FLAGS, MEM_LOCATION
|
||||
Initialize these strings per platform
|
||||
*/
|
||||
#ifndef COMPILER_VERSION
|
||||
#ifdef __GNUC__
|
||||
#define COMPILER_VERSION "GCC"__VERSION__
|
||||
#else
|
||||
#define COMPILER_VERSION "Please put compiler version here (e.g. gcc 4.1)"
|
||||
#endif
|
||||
#endif
|
||||
#ifndef COMPILER_FLAGS
|
||||
#define COMPILER_FLAGS "-O2"
|
||||
#endif
|
||||
#ifndef MEM_LOCATION
|
||||
#define MEM_LOCATION "STACK"
|
||||
#endif
|
||||
|
||||
/* Data Types :
|
||||
To avoid compiler issues, define the data types that need ot be used for
|
||||
8b, 16b and 32b in <core_portme.h>.
|
||||
|
||||
*Imprtant* :
|
||||
ee_ptr_int needs to be the data type used to hold pointers, otherwise
|
||||
coremark may fail!!!
|
||||
*/
|
||||
typedef signed short ee_s16;
|
||||
typedef unsigned short ee_u16;
|
||||
typedef signed int ee_s32;
|
||||
typedef double ee_f32;
|
||||
typedef unsigned char ee_u8;
|
||||
typedef unsigned int ee_u32;
|
||||
typedef ee_u32 ee_ptr_int;
|
||||
typedef size_t ee_size_t;
|
||||
#define NULL ((void *)0)
|
||||
/* align_mem :
|
||||
This macro is used to align an offset to point to a 32b value. It is
|
||||
used in the Matrix algorithm to initialize the input memory blocks.
|
||||
*/
|
||||
#define align_mem(x) (void *)(4 + (((ee_ptr_int)(x)-1) & ~3))
|
||||
|
||||
/* Configuration : CORE_TICKS
|
||||
Define type of return from the timing functions.
|
||||
*/
|
||||
//#define CORETIMETYPE ee_u32
|
||||
//typedef ee_u32 CORE_TICKS;
|
||||
|
||||
#define CORETIMETYPE uint64_t
|
||||
typedef uint64_t CORE_TICKS;
|
||||
|
||||
|
||||
/* Configuration : SEED_METHOD
|
||||
Defines method to get seed values that cannot be computed at compile
|
||||
time.
|
||||
|
||||
Valid values :
|
||||
SEED_ARG - from command line.
|
||||
SEED_FUNC - from a system function.
|
||||
SEED_VOLATILE - from volatile variables.
|
||||
*/
|
||||
#ifndef SEED_METHOD
|
||||
#define SEED_METHOD SEED_VOLATILE
|
||||
#endif
|
||||
|
||||
/* Configuration : MEM_METHOD
|
||||
Defines method to get a block of memry.
|
||||
|
||||
Valid values :
|
||||
MEM_MALLOC - for platforms that implement malloc and have malloc.h.
|
||||
MEM_STATIC - to use a static memory array.
|
||||
MEM_STACK - to allocate the data block on the stack (NYI).
|
||||
*/
|
||||
#ifndef MEM_METHOD
|
||||
#define MEM_METHOD MEM_STACK
|
||||
#endif
|
||||
|
||||
/* Configuration : MULTITHREAD
|
||||
Define for parallel execution
|
||||
|
||||
Valid values :
|
||||
1 - only one context (default).
|
||||
N>1 - will execute N copies in parallel.
|
||||
|
||||
Note :
|
||||
If this flag is defined to more then 1, an implementation for launching
|
||||
parallel contexts must be defined.
|
||||
|
||||
Two sample implementations are provided. Use <USE_PTHREAD> or <USE_FORK>
|
||||
to enable them.
|
||||
|
||||
It is valid to have a different implementation of <core_start_parallel>
|
||||
and <core_end_parallel> in <core_portme.c>, to fit a particular architecture.
|
||||
*/
|
||||
#ifndef MULTITHREAD
|
||||
#define MULTITHREAD 1
|
||||
#define USE_PTHREAD 0
|
||||
#define USE_FORK 0
|
||||
#define USE_SOCKET 0
|
||||
#endif
|
||||
|
||||
/* Configuration : MAIN_HAS_NOARGC
|
||||
Needed if platform does not support getting arguments to main.
|
||||
|
||||
Valid values :
|
||||
0 - argc/argv to main is supported
|
||||
1 - argc/argv to main is not supported
|
||||
|
||||
Note :
|
||||
This flag only matters if MULTITHREAD has been defined to a value
|
||||
greater then 1.
|
||||
*/
|
||||
#ifndef MAIN_HAS_NOARGC
|
||||
#define MAIN_HAS_NOARGC 1
|
||||
#endif
|
||||
|
||||
/* Configuration : MAIN_HAS_NORETURN
|
||||
Needed if platform does not support returning a value from main.
|
||||
|
||||
Valid values :
|
||||
0 - main returns an int, and return value will be 0.
|
||||
1 - platform does not support returning a value from main
|
||||
*/
|
||||
#ifndef MAIN_HAS_NORETURN
|
||||
#define MAIN_HAS_NORETURN 0
|
||||
#endif
|
||||
|
||||
/* Variable : default_num_contexts
|
||||
Not used for this simple port, must contain the value 1.
|
||||
*/
|
||||
extern ee_u32 default_num_contexts;
|
||||
|
||||
typedef struct CORE_PORTABLE_S
|
||||
{
|
||||
ee_u8 portable_id;
|
||||
} core_portable;
|
||||
|
||||
/* target specific init/fini */
|
||||
void portable_init(core_portable *p, int *argc, char *argv[]);
|
||||
void portable_fini(core_portable *p);
|
||||
|
||||
#if !defined(PROFILE_RUN) && !defined(PERFORMANCE_RUN) \
|
||||
&& !defined(VALIDATION_RUN)
|
||||
#if (TOTAL_DATA_SIZE == 1200)
|
||||
#define PROFILE_RUN 1
|
||||
#elif (TOTAL_DATA_SIZE == 2000)
|
||||
#define PERFORMANCE_RUN 1
|
||||
#else
|
||||
#define VALIDATION_RUN 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
int ee_printf(const char *fmt, ...);
|
||||
void print_coremarks(uint64_t ticks);
|
||||
|
||||
#endif /* CORE_PORTME_H */
|
||||
|
||||
@@ -0,0 +1,330 @@
|
||||
/*
|
||||
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
Original Author: Shay Gal-on
|
||||
*/
|
||||
|
||||
#include "coremark.h"
|
||||
/* local functions */
|
||||
enum CORE_STATE core_state_transition(ee_u8 **instr, ee_u32 *transition_count);
|
||||
|
||||
/*
|
||||
Topic: Description
|
||||
Simple state machines like this one are used in many embedded products.
|
||||
|
||||
For more complex state machines, sometimes a state transition table
|
||||
implementation is used instead, trading speed of direct coding for ease of
|
||||
maintenance.
|
||||
|
||||
Since the main goal of using a state machine in CoreMark is to excercise
|
||||
the switch/if behaviour, we are using a small moore machine.
|
||||
|
||||
In particular, this machine tests type of string input,
|
||||
trying to determine whether the input is a number or something else.
|
||||
(see core_state.png).
|
||||
*/
|
||||
|
||||
/* Function: core_bench_state
|
||||
Benchmark function
|
||||
|
||||
Go over the input twice, once direct, and once after introducing some
|
||||
corruption.
|
||||
*/
|
||||
ee_u16
|
||||
core_bench_state(ee_u32 blksize,
|
||||
ee_u8 *memblock,
|
||||
ee_s16 seed1,
|
||||
ee_s16 seed2,
|
||||
ee_s16 step,
|
||||
ee_u16 crc)
|
||||
{
|
||||
ee_u32 final_counts[NUM_CORE_STATES];
|
||||
ee_u32 track_counts[NUM_CORE_STATES];
|
||||
ee_u8 *p = memblock;
|
||||
ee_u32 i;
|
||||
|
||||
#if CORE_DEBUG
|
||||
ee_printf("State Bench: %d,%d,%d,%04x\n", seed1, seed2, step, crc);
|
||||
#endif
|
||||
for (i = 0; i < NUM_CORE_STATES; i++)
|
||||
{
|
||||
final_counts[i] = track_counts[i] = 0;
|
||||
}
|
||||
/* run the state machine over the input */
|
||||
while (*p != 0)
|
||||
{
|
||||
enum CORE_STATE fstate = core_state_transition(&p, track_counts);
|
||||
final_counts[fstate]++;
|
||||
#if CORE_DEBUG
|
||||
ee_printf("%d,", fstate);
|
||||
}
|
||||
ee_printf("\n");
|
||||
#else
|
||||
}
|
||||
#endif
|
||||
p = memblock;
|
||||
while (p < (memblock + blksize))
|
||||
{ /* insert some corruption */
|
||||
if (*p != ',')
|
||||
*p ^= (ee_u8)seed1;
|
||||
p += step;
|
||||
}
|
||||
p = memblock;
|
||||
/* run the state machine over the input again */
|
||||
while (*p != 0)
|
||||
{
|
||||
enum CORE_STATE fstate = core_state_transition(&p, track_counts);
|
||||
final_counts[fstate]++;
|
||||
#if CORE_DEBUG
|
||||
ee_printf("%d,", fstate);
|
||||
}
|
||||
ee_printf("\n");
|
||||
#else
|
||||
}
|
||||
#endif
|
||||
p = memblock;
|
||||
while (p < (memblock + blksize))
|
||||
{ /* undo corruption is seed1 and seed2 are equal */
|
||||
if (*p != ',')
|
||||
*p ^= (ee_u8)seed2;
|
||||
p += step;
|
||||
}
|
||||
/* end timing */
|
||||
for (i = 0; i < NUM_CORE_STATES; i++)
|
||||
{
|
||||
crc = crcu32(final_counts[i], crc);
|
||||
crc = crcu32(track_counts[i], crc);
|
||||
}
|
||||
return crc;
|
||||
}
|
||||
|
||||
/* Default initialization patterns */
|
||||
static ee_u8 *intpat[4]
|
||||
= { (ee_u8 *)"5012", (ee_u8 *)"1234", (ee_u8 *)"-874", (ee_u8 *)"+122" };
|
||||
static ee_u8 *floatpat[4] = { (ee_u8 *)"35.54400",
|
||||
(ee_u8 *)".1234500",
|
||||
(ee_u8 *)"-110.700",
|
||||
(ee_u8 *)"+0.64400" };
|
||||
static ee_u8 *scipat[4] = { (ee_u8 *)"5.500e+3",
|
||||
(ee_u8 *)"-.123e-2",
|
||||
(ee_u8 *)"-87e+832",
|
||||
(ee_u8 *)"+0.6e-12" };
|
||||
static ee_u8 *errpat[4] = { (ee_u8 *)"T0.3e-1F",
|
||||
(ee_u8 *)"-T.T++Tq",
|
||||
(ee_u8 *)"1T3.4e4z",
|
||||
(ee_u8 *)"34.0e-T^" };
|
||||
|
||||
/* Function: core_init_state
|
||||
Initialize the input data for the state machine.
|
||||
|
||||
Populate the input with several predetermined strings, interspersed.
|
||||
Actual patterns chosen depend on the seed parameter.
|
||||
|
||||
Note:
|
||||
The seed parameter MUST be supplied from a source that cannot be
|
||||
determined at compile time
|
||||
*/
|
||||
void
|
||||
core_init_state(ee_u32 size, ee_s16 seed, ee_u8 *p)
|
||||
{
|
||||
ee_u32 total = 0, next = 0, i;
|
||||
ee_u8 *buf = 0;
|
||||
#if CORE_DEBUG
|
||||
ee_u8 *start = p;
|
||||
ee_printf("State: %d,%d\n", size, seed);
|
||||
#endif
|
||||
size--;
|
||||
next = 0;
|
||||
while ((total + next + 1) < size)
|
||||
{
|
||||
if (next > 0)
|
||||
{
|
||||
for (i = 0; i < next; i++)
|
||||
*(p + total + i) = buf[i];
|
||||
*(p + total + i) = ',';
|
||||
total += next + 1;
|
||||
}
|
||||
seed++;
|
||||
switch (seed & 0x7)
|
||||
{
|
||||
case 0: /* int */
|
||||
case 1: /* int */
|
||||
case 2: /* int */
|
||||
buf = intpat[(seed >> 3) & 0x3];
|
||||
next = 4;
|
||||
break;
|
||||
case 3: /* float */
|
||||
case 4: /* float */
|
||||
buf = floatpat[(seed >> 3) & 0x3];
|
||||
next = 8;
|
||||
break;
|
||||
case 5: /* scientific */
|
||||
case 6: /* scientific */
|
||||
buf = scipat[(seed >> 3) & 0x3];
|
||||
next = 8;
|
||||
break;
|
||||
case 7: /* invalid */
|
||||
buf = errpat[(seed >> 3) & 0x3];
|
||||
next = 8;
|
||||
break;
|
||||
default: /* Never happen, just to make some compilers happy */
|
||||
break;
|
||||
}
|
||||
}
|
||||
size++;
|
||||
while (total < size)
|
||||
{ /* fill the rest with 0 */
|
||||
*(p + total) = 0;
|
||||
total++;
|
||||
}
|
||||
#if CORE_DEBUG
|
||||
ee_printf("State Input: %s\n", start);
|
||||
#endif
|
||||
}
|
||||
|
||||
static ee_u8
|
||||
ee_isdigit(ee_u8 c)
|
||||
{
|
||||
ee_u8 retval;
|
||||
retval = ((c >= '0') & (c <= '9')) ? 1 : 0;
|
||||
return retval;
|
||||
}
|
||||
|
||||
/* Function: core_state_transition
|
||||
Actual state machine.
|
||||
|
||||
The state machine will continue scanning until either:
|
||||
1 - an invalid input is detected.
|
||||
2 - a valid number has been detected.
|
||||
|
||||
The input pointer is updated to point to the end of the token, and the
|
||||
end state is returned (either specific format determined or invalid).
|
||||
*/
|
||||
|
||||
enum CORE_STATE
|
||||
core_state_transition(ee_u8 **instr, ee_u32 *transition_count)
|
||||
{
|
||||
ee_u8 * str = *instr;
|
||||
ee_u8 NEXT_SYMBOL;
|
||||
enum CORE_STATE state = CORE_START;
|
||||
for (; *str && state != CORE_INVALID; str++)
|
||||
{
|
||||
NEXT_SYMBOL = *str;
|
||||
if (NEXT_SYMBOL == ',') /* end of this input */
|
||||
{
|
||||
str++;
|
||||
break;
|
||||
}
|
||||
switch (state)
|
||||
{
|
||||
case CORE_START:
|
||||
if (ee_isdigit(NEXT_SYMBOL))
|
||||
{
|
||||
state = CORE_INT;
|
||||
}
|
||||
else if (NEXT_SYMBOL == '+' || NEXT_SYMBOL == '-')
|
||||
{
|
||||
state = CORE_S1;
|
||||
}
|
||||
else if (NEXT_SYMBOL == '.')
|
||||
{
|
||||
state = CORE_FLOAT;
|
||||
}
|
||||
else
|
||||
{
|
||||
state = CORE_INVALID;
|
||||
transition_count[CORE_INVALID]++;
|
||||
}
|
||||
transition_count[CORE_START]++;
|
||||
break;
|
||||
case CORE_S1:
|
||||
if (ee_isdigit(NEXT_SYMBOL))
|
||||
{
|
||||
state = CORE_INT;
|
||||
transition_count[CORE_S1]++;
|
||||
}
|
||||
else if (NEXT_SYMBOL == '.')
|
||||
{
|
||||
state = CORE_FLOAT;
|
||||
transition_count[CORE_S1]++;
|
||||
}
|
||||
else
|
||||
{
|
||||
state = CORE_INVALID;
|
||||
transition_count[CORE_S1]++;
|
||||
}
|
||||
break;
|
||||
case CORE_INT:
|
||||
if (NEXT_SYMBOL == '.')
|
||||
{
|
||||
state = CORE_FLOAT;
|
||||
transition_count[CORE_INT]++;
|
||||
}
|
||||
else if (!ee_isdigit(NEXT_SYMBOL))
|
||||
{
|
||||
state = CORE_INVALID;
|
||||
transition_count[CORE_INT]++;
|
||||
}
|
||||
break;
|
||||
case CORE_FLOAT:
|
||||
if (NEXT_SYMBOL == 'E' || NEXT_SYMBOL == 'e')
|
||||
{
|
||||
state = CORE_S2;
|
||||
transition_count[CORE_FLOAT]++;
|
||||
}
|
||||
else if (!ee_isdigit(NEXT_SYMBOL))
|
||||
{
|
||||
state = CORE_INVALID;
|
||||
transition_count[CORE_FLOAT]++;
|
||||
}
|
||||
break;
|
||||
case CORE_S2:
|
||||
if (NEXT_SYMBOL == '+' || NEXT_SYMBOL == '-')
|
||||
{
|
||||
state = CORE_EXPONENT;
|
||||
transition_count[CORE_S2]++;
|
||||
}
|
||||
else
|
||||
{
|
||||
state = CORE_INVALID;
|
||||
transition_count[CORE_S2]++;
|
||||
}
|
||||
break;
|
||||
case CORE_EXPONENT:
|
||||
if (ee_isdigit(NEXT_SYMBOL))
|
||||
{
|
||||
state = CORE_SCIENTIFIC;
|
||||
transition_count[CORE_EXPONENT]++;
|
||||
}
|
||||
else
|
||||
{
|
||||
state = CORE_INVALID;
|
||||
transition_count[CORE_EXPONENT]++;
|
||||
}
|
||||
break;
|
||||
case CORE_SCIENTIFIC:
|
||||
if (!ee_isdigit(NEXT_SYMBOL))
|
||||
{
|
||||
state = CORE_INVALID;
|
||||
transition_count[CORE_INVALID]++;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
*instr = str;
|
||||
return state;
|
||||
}
|
||||
@@ -0,0 +1,249 @@
|
||||
/*
|
||||
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
Original Author: Shay Gal-on
|
||||
*/
|
||||
|
||||
#include "coremark.h"
|
||||
/* Function: get_seed
|
||||
Get a values that cannot be determined at compile time.
|
||||
|
||||
Since different embedded systems and compilers are used, 3 different
|
||||
methods are provided: 1 - Using a volatile variable. This method is only
|
||||
valid if the compiler is forced to generate code that reads the value of a
|
||||
volatile variable from memory at run time. Please note, if using this method,
|
||||
you would need to modify core_portme.c to generate training profile. 2 -
|
||||
Command line arguments. This is the preferred method if command line
|
||||
arguments are supported. 3 - System function. If none of the first 2 methods
|
||||
is available on the platform, a system function which is not a stub can be
|
||||
used.
|
||||
|
||||
e.g. read the value on GPIO pins connected to switches, or invoke
|
||||
special simulator functions.
|
||||
*/
|
||||
#if (SEED_METHOD == SEED_VOLATILE)
|
||||
extern volatile ee_s32 seed1_volatile;
|
||||
extern volatile ee_s32 seed2_volatile;
|
||||
extern volatile ee_s32 seed3_volatile;
|
||||
extern volatile ee_s32 seed4_volatile;
|
||||
extern volatile ee_s32 seed5_volatile;
|
||||
ee_s32
|
||||
get_seed_32(int i)
|
||||
{
|
||||
ee_s32 retval;
|
||||
switch (i)
|
||||
{
|
||||
case 1:
|
||||
retval = seed1_volatile;
|
||||
break;
|
||||
case 2:
|
||||
retval = seed2_volatile;
|
||||
break;
|
||||
case 3:
|
||||
retval = seed3_volatile;
|
||||
break;
|
||||
case 4:
|
||||
retval = seed4_volatile;
|
||||
break;
|
||||
case 5:
|
||||
retval = seed5_volatile;
|
||||
break;
|
||||
default:
|
||||
retval = 0;
|
||||
break;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
#elif (SEED_METHOD == SEED_ARG)
|
||||
ee_s32
|
||||
parseval(char *valstring)
|
||||
{
|
||||
ee_s32 retval = 0;
|
||||
ee_s32 neg = 1;
|
||||
int hexmode = 0;
|
||||
if (*valstring == '-')
|
||||
{
|
||||
neg = -1;
|
||||
valstring++;
|
||||
}
|
||||
if ((valstring[0] == '0') && (valstring[1] == 'x'))
|
||||
{
|
||||
hexmode = 1;
|
||||
valstring += 2;
|
||||
}
|
||||
/* first look for digits */
|
||||
if (hexmode)
|
||||
{
|
||||
while (((*valstring >= '0') && (*valstring <= '9'))
|
||||
|| ((*valstring >= 'a') && (*valstring <= 'f')))
|
||||
{
|
||||
ee_s32 digit = *valstring - '0';
|
||||
if (digit > 9)
|
||||
digit = 10 + *valstring - 'a';
|
||||
retval *= 16;
|
||||
retval += digit;
|
||||
valstring++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while ((*valstring >= '0') && (*valstring <= '9'))
|
||||
{
|
||||
ee_s32 digit = *valstring - '0';
|
||||
retval *= 10;
|
||||
retval += digit;
|
||||
valstring++;
|
||||
}
|
||||
}
|
||||
/* now add qualifiers */
|
||||
if (*valstring == 'K')
|
||||
retval *= 1024;
|
||||
if (*valstring == 'M')
|
||||
retval *= 1024 * 1024;
|
||||
|
||||
retval *= neg;
|
||||
return retval;
|
||||
}
|
||||
|
||||
ee_s32
|
||||
get_seed_args(int i, int argc, char *argv[])
|
||||
{
|
||||
if (argc > i)
|
||||
return parseval(argv[i]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#elif (SEED_METHOD == SEED_FUNC)
|
||||
/* If using OS based function, you must define and implement the functions below
|
||||
* in core_portme.h and core_portme.c ! */
|
||||
ee_s32
|
||||
get_seed_32(int i)
|
||||
{
|
||||
ee_s32 retval;
|
||||
switch (i)
|
||||
{
|
||||
case 1:
|
||||
retval = portme_sys1();
|
||||
break;
|
||||
case 2:
|
||||
retval = portme_sys2();
|
||||
break;
|
||||
case 3:
|
||||
retval = portme_sys3();
|
||||
break;
|
||||
case 4:
|
||||
retval = portme_sys4();
|
||||
break;
|
||||
case 5:
|
||||
retval = portme_sys5();
|
||||
break;
|
||||
default:
|
||||
retval = 0;
|
||||
break;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Function: crc*
|
||||
Service functions to calculate 16b CRC code.
|
||||
|
||||
*/
|
||||
ee_u16
|
||||
crcu8(ee_u8 data, ee_u16 crc)
|
||||
{
|
||||
ee_u8 i = 0, x16 = 0, carry = 0;
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
x16 = (ee_u8)((data & 1) ^ ((ee_u8)crc & 1));
|
||||
data >>= 1;
|
||||
|
||||
if (x16 == 1)
|
||||
{
|
||||
crc ^= 0x4002;
|
||||
carry = 1;
|
||||
}
|
||||
else
|
||||
carry = 0;
|
||||
crc >>= 1;
|
||||
if (carry)
|
||||
crc |= 0x8000;
|
||||
else
|
||||
crc &= 0x7fff;
|
||||
}
|
||||
return crc;
|
||||
}
|
||||
ee_u16
|
||||
crcu16(ee_u16 newval, ee_u16 crc)
|
||||
{
|
||||
crc = crcu8((ee_u8)(newval), crc);
|
||||
crc = crcu8((ee_u8)((newval) >> 8), crc);
|
||||
return crc;
|
||||
}
|
||||
ee_u16
|
||||
crcu32(ee_u32 newval, ee_u16 crc)
|
||||
{
|
||||
crc = crc16((ee_s16)newval, crc);
|
||||
crc = crc16((ee_s16)(newval >> 16), crc);
|
||||
return crc;
|
||||
}
|
||||
ee_u16
|
||||
crc16(ee_s16 newval, ee_u16 crc)
|
||||
{
|
||||
return crcu16((ee_u16)newval, crc);
|
||||
}
|
||||
|
||||
ee_u8
|
||||
check_data_types()
|
||||
{
|
||||
ee_u8 retval = 0;
|
||||
if (sizeof(ee_u8) != 1)
|
||||
{
|
||||
ee_printf("ERROR: ee_u8 is not an 8b datatype!\n");
|
||||
retval++;
|
||||
}
|
||||
if (sizeof(ee_u16) != 2)
|
||||
{
|
||||
ee_printf("ERROR: ee_u16 is not a 16b datatype!\n");
|
||||
retval++;
|
||||
}
|
||||
if (sizeof(ee_s16) != 2)
|
||||
{
|
||||
ee_printf("ERROR: ee_s16 is not a 16b datatype!\n");
|
||||
retval++;
|
||||
}
|
||||
if (sizeof(ee_s32) != 4)
|
||||
{
|
||||
ee_printf("ERROR: ee_s32 is not a 32b datatype!\n");
|
||||
retval++;
|
||||
}
|
||||
if (sizeof(ee_u32) != 4)
|
||||
{
|
||||
ee_printf("ERROR: ee_u32 is not a 32b datatype!\n");
|
||||
retval++;
|
||||
}
|
||||
if (sizeof(ee_ptr_int) != sizeof(int *))
|
||||
{
|
||||
ee_printf(
|
||||
"ERROR: ee_ptr_int is not a datatype that holds an int pointer!\n");
|
||||
retval++;
|
||||
}
|
||||
if (retval > 0)
|
||||
{
|
||||
ee_printf("ERROR: Please modify the datatypes in core_portme.h!\n");
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
@@ -0,0 +1,184 @@
|
||||
#pragma once
|
||||
/*
|
||||
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
Original Author: Shay Gal-on
|
||||
*/
|
||||
|
||||
/* Topic: Description
|
||||
This file contains declarations of the various benchmark functions.
|
||||
*/
|
||||
|
||||
/* Configuration: TOTAL_DATA_SIZE
|
||||
Define total size for data algorithms will operate on
|
||||
*/
|
||||
#ifndef TOTAL_DATA_SIZE
|
||||
#define TOTAL_DATA_SIZE 2 * 1000
|
||||
#endif
|
||||
|
||||
#define SEED_ARG 0
|
||||
#define SEED_FUNC 1
|
||||
#define SEED_VOLATILE 2
|
||||
|
||||
#define MEM_STATIC 0
|
||||
#define MEM_MALLOC 1
|
||||
#define MEM_STACK 2
|
||||
|
||||
#include "core_portme.h"
|
||||
|
||||
#if HAS_STDIO
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
#if HAS_PRINTF
|
||||
#define ee_printf printf
|
||||
#endif
|
||||
|
||||
/* Actual benchmark execution in iterate */
|
||||
void *iterate(void *pres);
|
||||
|
||||
/* Typedef: secs_ret
|
||||
For machines that have floating point support, get number of seconds as
|
||||
a double. Otherwise an unsigned int.
|
||||
*/
|
||||
#if HAS_FLOAT
|
||||
typedef double secs_ret;
|
||||
#else
|
||||
typedef ee_u32 secs_ret;
|
||||
#endif
|
||||
|
||||
#if MAIN_HAS_NORETURN
|
||||
#define MAIN_RETURN_VAL
|
||||
#define MAIN_RETURN_TYPE void
|
||||
#else
|
||||
#define MAIN_RETURN_VAL 0
|
||||
#define MAIN_RETURN_TYPE int
|
||||
#endif
|
||||
|
||||
void start_time(void);
|
||||
void stop_time(void);
|
||||
CORE_TICKS get_time(void);
|
||||
secs_ret time_in_secs(CORE_TICKS ticks);
|
||||
|
||||
/* Misc useful functions */
|
||||
ee_u16 crcu8(ee_u8 data, ee_u16 crc);
|
||||
ee_u16 crc16(ee_s16 newval, ee_u16 crc);
|
||||
ee_u16 crcu16(ee_u16 newval, ee_u16 crc);
|
||||
ee_u16 crcu32(ee_u32 newval, ee_u16 crc);
|
||||
ee_u8 check_data_types(void);
|
||||
void * portable_malloc(ee_size_t size);
|
||||
void portable_free(void *p);
|
||||
ee_s32 parseval(char *valstring);
|
||||
|
||||
/* Algorithm IDS */
|
||||
#define ID_LIST (1 << 0)
|
||||
#define ID_MATRIX (1 << 1)
|
||||
#define ID_STATE (1 << 2)
|
||||
#define ALL_ALGORITHMS_MASK (ID_LIST | ID_MATRIX | ID_STATE)
|
||||
#define NUM_ALGORITHMS 3
|
||||
|
||||
/* list data structures */
|
||||
typedef struct list_data_s
|
||||
{
|
||||
ee_s16 data16;
|
||||
ee_s16 idx;
|
||||
} list_data;
|
||||
|
||||
typedef struct list_head_s
|
||||
{
|
||||
struct list_head_s *next;
|
||||
struct list_data_s *info;
|
||||
} list_head;
|
||||
|
||||
/*matrix benchmark related stuff */
|
||||
#define MATDAT_INT 1
|
||||
#if MATDAT_INT
|
||||
typedef ee_s16 MATDAT;
|
||||
typedef ee_s32 MATRES;
|
||||
#else
|
||||
typedef ee_f16 MATDAT;
|
||||
typedef ee_f32 MATRES;
|
||||
#endif
|
||||
|
||||
typedef struct MAT_PARAMS_S
|
||||
{
|
||||
int N;
|
||||
MATDAT *A;
|
||||
MATDAT *B;
|
||||
MATRES *C;
|
||||
} mat_params;
|
||||
|
||||
/* state machine related stuff */
|
||||
/* List of all the possible states for the FSM */
|
||||
typedef enum CORE_STATE
|
||||
{
|
||||
CORE_START = 0,
|
||||
CORE_INVALID,
|
||||
CORE_S1,
|
||||
CORE_S2,
|
||||
CORE_INT,
|
||||
CORE_FLOAT,
|
||||
CORE_EXPONENT,
|
||||
CORE_SCIENTIFIC,
|
||||
NUM_CORE_STATES
|
||||
} core_state_e;
|
||||
|
||||
/* Helper structure to hold results */
|
||||
typedef struct RESULTS_S
|
||||
{
|
||||
/* inputs */
|
||||
ee_s16 seed1; /* Initializing seed */
|
||||
ee_s16 seed2; /* Initializing seed */
|
||||
ee_s16 seed3; /* Initializing seed */
|
||||
void * memblock[4]; /* Pointer to safe memory location */
|
||||
ee_u32 size; /* Size of the data */
|
||||
ee_u32 iterations; /* Number of iterations to execute */
|
||||
ee_u32 execs; /* Bitmask of operations to execute */
|
||||
struct list_head_s *list;
|
||||
mat_params mat;
|
||||
/* outputs */
|
||||
ee_u16 crc;
|
||||
ee_u16 crclist;
|
||||
ee_u16 crcmatrix;
|
||||
ee_u16 crcstate;
|
||||
ee_s16 err;
|
||||
/* ultithread specific */
|
||||
core_portable port;
|
||||
} core_results;
|
||||
|
||||
/* Multicore execution handling */
|
||||
#if (MULTITHREAD > 1)
|
||||
ee_u8 core_start_parallel(core_results *res);
|
||||
ee_u8 core_stop_parallel(core_results *res);
|
||||
#endif
|
||||
|
||||
/* list benchmark functions */
|
||||
list_head *core_list_init(ee_u32 blksize, list_head *memblock, ee_s16 seed);
|
||||
ee_u16 core_bench_list(core_results *res, ee_s16 finder_idx);
|
||||
|
||||
/* state benchmark functions */
|
||||
void core_init_state(ee_u32 size, ee_s16 seed, ee_u8 *p);
|
||||
ee_u16 core_bench_state(ee_u32 blksize,
|
||||
ee_u8 *memblock,
|
||||
ee_s16 seed1,
|
||||
ee_s16 seed2,
|
||||
ee_s16 step,
|
||||
ee_u16 crc);
|
||||
|
||||
/* matrix benchmark functions */
|
||||
ee_u32 core_init_matrix(ee_u32 blksize,
|
||||
void * memblk,
|
||||
ee_s32 seed,
|
||||
mat_params *p);
|
||||
ee_u16 core_bench_matrix(mat_params *p, ee_s16 seed, ee_u16 crc);
|
||||
@@ -0,0 +1,712 @@
|
||||
/*
|
||||
Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC)
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
#include <io.h>
|
||||
#include "coremark.h"
|
||||
#include <stdarg.h>
|
||||
|
||||
#define ZEROPAD (1 << 0) /* Pad with zero */
|
||||
#define SIGN (1 << 1) /* Unsigned/signed long */
|
||||
#define PLUS (1 << 2) /* Show plus */
|
||||
#define SPACE (1 << 3) /* Spacer */
|
||||
#define LEFT (1 << 4) /* Left justified */
|
||||
#define HEX_PREP (1 << 5) /* 0x */
|
||||
#define UPPERCASE (1 << 6) /* 'ABCDEF' */
|
||||
|
||||
#define is_digit(c) ((c) >= '0' && (c) <= '9')
|
||||
|
||||
static char * digits = "0123456789abcdefghijklmnopqrstuvwxyz";
|
||||
static char * upper_digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
|
||||
static ee_size_t strnlen(const char *s, ee_size_t count);
|
||||
|
||||
static ee_size_t
|
||||
strnlen(const char *s, ee_size_t count)
|
||||
{
|
||||
const char *sc;
|
||||
for (sc = s; *sc != '\0' && count--; ++sc)
|
||||
;
|
||||
return sc - s;
|
||||
}
|
||||
|
||||
static int
|
||||
skip_atoi(const char **s)
|
||||
{
|
||||
int i = 0;
|
||||
while (is_digit(**s))
|
||||
i = i * 10 + *((*s)++) - '0';
|
||||
return i;
|
||||
}
|
||||
|
||||
static char *
|
||||
number(char *str, long num, int base, int size, int precision, int type)
|
||||
{
|
||||
char c, sign, tmp[66];
|
||||
char *dig = digits;
|
||||
int i;
|
||||
|
||||
if (type & UPPERCASE)
|
||||
dig = upper_digits;
|
||||
if (type & LEFT)
|
||||
type &= ~ZEROPAD;
|
||||
if (base < 2 || base > 36)
|
||||
return 0;
|
||||
|
||||
c = (type & ZEROPAD) ? '0' : ' ';
|
||||
sign = 0;
|
||||
if (type & SIGN)
|
||||
{
|
||||
if (num < 0)
|
||||
{
|
||||
sign = '-';
|
||||
num = -num;
|
||||
size--;
|
||||
}
|
||||
else if (type & PLUS)
|
||||
{
|
||||
sign = '+';
|
||||
size--;
|
||||
}
|
||||
else if (type & SPACE)
|
||||
{
|
||||
sign = ' ';
|
||||
size--;
|
||||
}
|
||||
}
|
||||
|
||||
if (type & HEX_PREP)
|
||||
{
|
||||
if (base == 16)
|
||||
size -= 2;
|
||||
else if (base == 8)
|
||||
size--;
|
||||
}
|
||||
|
||||
i = 0;
|
||||
|
||||
if (num == 0)
|
||||
tmp[i++] = '0';
|
||||
else
|
||||
{
|
||||
while (num != 0)
|
||||
{
|
||||
tmp[i++] = dig[((unsigned long)num) % (unsigned)base];
|
||||
num = ((unsigned long)num) / (unsigned)base;
|
||||
}
|
||||
}
|
||||
|
||||
if (i > precision)
|
||||
precision = i;
|
||||
size -= precision;
|
||||
if (!(type & (ZEROPAD | LEFT)))
|
||||
while (size-- > 0)
|
||||
*str++ = ' ';
|
||||
if (sign)
|
||||
*str++ = sign;
|
||||
|
||||
if (type & HEX_PREP)
|
||||
{
|
||||
if (base == 8)
|
||||
*str++ = '0';
|
||||
else if (base == 16)
|
||||
{
|
||||
*str++ = '0';
|
||||
*str++ = digits[33];
|
||||
}
|
||||
}
|
||||
|
||||
if (!(type & LEFT))
|
||||
while (size-- > 0)
|
||||
*str++ = c;
|
||||
while (i < precision--)
|
||||
*str++ = '0';
|
||||
while (i-- > 0)
|
||||
*str++ = tmp[i];
|
||||
while (size-- > 0)
|
||||
*str++ = ' ';
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
static char *
|
||||
eaddr(char *str, unsigned char *addr, int size, int precision, int type)
|
||||
{
|
||||
char tmp[24];
|
||||
char *dig = digits;
|
||||
int i, len;
|
||||
|
||||
if (type & UPPERCASE)
|
||||
dig = upper_digits;
|
||||
len = 0;
|
||||
for (i = 0; i < 6; i++)
|
||||
{
|
||||
if (i != 0)
|
||||
tmp[len++] = ':';
|
||||
tmp[len++] = dig[addr[i] >> 4];
|
||||
tmp[len++] = dig[addr[i] & 0x0F];
|
||||
}
|
||||
|
||||
if (!(type & LEFT))
|
||||
while (len < size--)
|
||||
*str++ = ' ';
|
||||
for (i = 0; i < len; ++i)
|
||||
*str++ = tmp[i];
|
||||
while (len < size--)
|
||||
*str++ = ' ';
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
static char *
|
||||
iaddr(char *str, unsigned char *addr, int size, int precision, int type)
|
||||
{
|
||||
char tmp[24];
|
||||
int i, n, len;
|
||||
|
||||
len = 0;
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
if (i != 0)
|
||||
tmp[len++] = '.';
|
||||
n = addr[i];
|
||||
|
||||
if (n == 0)
|
||||
tmp[len++] = digits[0];
|
||||
else
|
||||
{
|
||||
if (n >= 100)
|
||||
{
|
||||
tmp[len++] = digits[n / 100];
|
||||
n = n % 100;
|
||||
tmp[len++] = digits[n / 10];
|
||||
n = n % 10;
|
||||
}
|
||||
else if (n >= 10)
|
||||
{
|
||||
tmp[len++] = digits[n / 10];
|
||||
n = n % 10;
|
||||
}
|
||||
|
||||
tmp[len++] = digits[n];
|
||||
}
|
||||
}
|
||||
|
||||
if (!(type & LEFT))
|
||||
while (len < size--)
|
||||
*str++ = ' ';
|
||||
for (i = 0; i < len; ++i)
|
||||
*str++ = tmp[i];
|
||||
while (len < size--)
|
||||
*str++ = ' ';
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
#if HAS_FLOAT
|
||||
|
||||
char * ecvtbuf(double arg, int ndigits, int *decpt, int *sign, char *buf);
|
||||
char * fcvtbuf(double arg, int ndigits, int *decpt, int *sign, char *buf);
|
||||
static void ee_bufcpy(char *d, char *s, int count);
|
||||
|
||||
void
|
||||
ee_bufcpy(char *pd, char *ps, int count)
|
||||
{
|
||||
char *pe = ps + count;
|
||||
while (ps != pe)
|
||||
*pd++ = *ps++;
|
||||
}
|
||||
|
||||
static void
|
||||
parse_float(double value, char *buffer, char fmt, int precision)
|
||||
{
|
||||
int decpt, sign, exp, pos;
|
||||
char *digits = NULL;
|
||||
char cvtbuf[80];
|
||||
int capexp = 0;
|
||||
int magnitude;
|
||||
|
||||
if (fmt == 'G' || fmt == 'E')
|
||||
{
|
||||
capexp = 1;
|
||||
fmt += 'a' - 'A';
|
||||
}
|
||||
|
||||
if (fmt == 'g')
|
||||
{
|
||||
digits = ecvtbuf(value, precision, &decpt, &sign, cvtbuf);
|
||||
magnitude = decpt - 1;
|
||||
if (magnitude < -4 || magnitude > precision - 1)
|
||||
{
|
||||
fmt = 'e';
|
||||
precision -= 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
fmt = 'f';
|
||||
precision -= decpt;
|
||||
}
|
||||
}
|
||||
|
||||
if (fmt == 'e')
|
||||
{
|
||||
digits = ecvtbuf(value, precision + 1, &decpt, &sign, cvtbuf);
|
||||
|
||||
if (sign)
|
||||
*buffer++ = '-';
|
||||
*buffer++ = *digits;
|
||||
if (precision > 0)
|
||||
*buffer++ = '.';
|
||||
ee_bufcpy(buffer, digits + 1, precision);
|
||||
buffer += precision;
|
||||
*buffer++ = capexp ? 'E' : 'e';
|
||||
|
||||
if (decpt == 0)
|
||||
{
|
||||
if (value == 0.0)
|
||||
exp = 0;
|
||||
else
|
||||
exp = -1;
|
||||
}
|
||||
else
|
||||
exp = decpt - 1;
|
||||
|
||||
if (exp < 0)
|
||||
{
|
||||
*buffer++ = '-';
|
||||
exp = -exp;
|
||||
}
|
||||
else
|
||||
*buffer++ = '+';
|
||||
|
||||
buffer[2] = (exp % 10) + '0';
|
||||
exp = exp / 10;
|
||||
buffer[1] = (exp % 10) + '0';
|
||||
exp = exp / 10;
|
||||
buffer[0] = (exp % 10) + '0';
|
||||
buffer += 3;
|
||||
}
|
||||
else if (fmt == 'f')
|
||||
{
|
||||
digits = fcvtbuf(value, precision, &decpt, &sign, cvtbuf);
|
||||
if (sign)
|
||||
*buffer++ = '-';
|
||||
if (*digits)
|
||||
{
|
||||
if (decpt <= 0)
|
||||
{
|
||||
*buffer++ = '0';
|
||||
*buffer++ = '.';
|
||||
for (pos = 0; pos < -decpt; pos++)
|
||||
*buffer++ = '0';
|
||||
while (*digits)
|
||||
*buffer++ = *digits++;
|
||||
}
|
||||
else
|
||||
{
|
||||
pos = 0;
|
||||
while (*digits)
|
||||
{
|
||||
if (pos++ == decpt)
|
||||
*buffer++ = '.';
|
||||
*buffer++ = *digits++;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
*buffer++ = '0';
|
||||
if (precision > 0)
|
||||
{
|
||||
*buffer++ = '.';
|
||||
for (pos = 0; pos < precision; pos++)
|
||||
*buffer++ = '0';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
*buffer = '\0';
|
||||
}
|
||||
|
||||
static void
|
||||
decimal_point(char *buffer)
|
||||
{
|
||||
while (*buffer)
|
||||
{
|
||||
if (*buffer == '.')
|
||||
return;
|
||||
if (*buffer == 'e' || *buffer == 'E')
|
||||
break;
|
||||
buffer++;
|
||||
}
|
||||
|
||||
if (*buffer)
|
||||
{
|
||||
int n = strnlen(buffer, 256);
|
||||
while (n > 0)
|
||||
{
|
||||
buffer[n + 1] = buffer[n];
|
||||
n--;
|
||||
}
|
||||
|
||||
*buffer = '.';
|
||||
}
|
||||
else
|
||||
{
|
||||
*buffer++ = '.';
|
||||
*buffer = '\0';
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
cropzeros(char *buffer)
|
||||
{
|
||||
char *stop;
|
||||
|
||||
while (*buffer && *buffer != '.')
|
||||
buffer++;
|
||||
if (*buffer++)
|
||||
{
|
||||
while (*buffer && *buffer != 'e' && *buffer != 'E')
|
||||
buffer++;
|
||||
stop = buffer--;
|
||||
while (*buffer == '0')
|
||||
buffer--;
|
||||
if (*buffer == '.')
|
||||
buffer--;
|
||||
while (buffer != stop)
|
||||
*++buffer = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static char *
|
||||
flt(char *str, double num, int size, int precision, char fmt, int flags)
|
||||
{
|
||||
char tmp[80];
|
||||
char c, sign;
|
||||
int n, i;
|
||||
|
||||
// Left align means no zero padding
|
||||
if (flags & LEFT)
|
||||
flags &= ~ZEROPAD;
|
||||
|
||||
// Determine padding and sign char
|
||||
c = (flags & ZEROPAD) ? '0' : ' ';
|
||||
sign = 0;
|
||||
if (flags & SIGN)
|
||||
{
|
||||
if (num < 0.0)
|
||||
{
|
||||
sign = '-';
|
||||
num = -num;
|
||||
size--;
|
||||
}
|
||||
else if (flags & PLUS)
|
||||
{
|
||||
sign = '+';
|
||||
size--;
|
||||
}
|
||||
else if (flags & SPACE)
|
||||
{
|
||||
sign = ' ';
|
||||
size--;
|
||||
}
|
||||
}
|
||||
|
||||
// Compute the precision value
|
||||
if (precision < 0)
|
||||
precision = 6; // Default precision: 6
|
||||
|
||||
// Convert floating point number to text
|
||||
parse_float(num, tmp, fmt, precision);
|
||||
|
||||
if ((flags & HEX_PREP) && precision == 0)
|
||||
decimal_point(tmp);
|
||||
if (fmt == 'g' && !(flags & HEX_PREP))
|
||||
cropzeros(tmp);
|
||||
|
||||
n = strnlen(tmp, 256);
|
||||
|
||||
// Output number with alignment and padding
|
||||
size -= n;
|
||||
if (!(flags & (ZEROPAD | LEFT)))
|
||||
while (size-- > 0)
|
||||
*str++ = ' ';
|
||||
if (sign)
|
||||
*str++ = sign;
|
||||
if (!(flags & LEFT))
|
||||
while (size-- > 0)
|
||||
*str++ = c;
|
||||
for (i = 0; i < n; i++)
|
||||
*str++ = tmp[i];
|
||||
while (size-- > 0)
|
||||
*str++ = ' ';
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static int
|
||||
ee_vsprintf(char *buf, const char *fmt, va_list args)
|
||||
{
|
||||
int len;
|
||||
unsigned long num;
|
||||
int i, base;
|
||||
char * str;
|
||||
char * s;
|
||||
|
||||
int flags; // Flags to number()
|
||||
|
||||
int field_width; // Width of output field
|
||||
int precision; // Min. # of digits for integers; max number of chars for
|
||||
// from string
|
||||
int qualifier; // 'h', 'l', or 'L' for integer fields
|
||||
|
||||
for (str = buf; *fmt; fmt++)
|
||||
{
|
||||
if (*fmt != '%')
|
||||
{
|
||||
*str++ = *fmt;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Process flags
|
||||
flags = 0;
|
||||
repeat:
|
||||
fmt++; // This also skips first '%'
|
||||
switch (*fmt)
|
||||
{
|
||||
case '-':
|
||||
flags |= LEFT;
|
||||
goto repeat;
|
||||
case '+':
|
||||
flags |= PLUS;
|
||||
goto repeat;
|
||||
case ' ':
|
||||
flags |= SPACE;
|
||||
goto repeat;
|
||||
case '#':
|
||||
flags |= HEX_PREP;
|
||||
goto repeat;
|
||||
case '0':
|
||||
flags |= ZEROPAD;
|
||||
goto repeat;
|
||||
}
|
||||
|
||||
// Get field width
|
||||
field_width = -1;
|
||||
if (is_digit(*fmt))
|
||||
field_width = skip_atoi(&fmt);
|
||||
else if (*fmt == '*')
|
||||
{
|
||||
fmt++;
|
||||
field_width = va_arg(args, int);
|
||||
if (field_width < 0)
|
||||
{
|
||||
field_width = -field_width;
|
||||
flags |= LEFT;
|
||||
}
|
||||
}
|
||||
|
||||
// Get the precision
|
||||
precision = -1;
|
||||
if (*fmt == '.')
|
||||
{
|
||||
++fmt;
|
||||
if (is_digit(*fmt))
|
||||
precision = skip_atoi(&fmt);
|
||||
else if (*fmt == '*')
|
||||
{
|
||||
++fmt;
|
||||
precision = va_arg(args, int);
|
||||
}
|
||||
if (precision < 0)
|
||||
precision = 0;
|
||||
}
|
||||
|
||||
// Get the conversion qualifier
|
||||
qualifier = -1;
|
||||
if (*fmt == 'l' || *fmt == 'L')
|
||||
{
|
||||
qualifier = *fmt;
|
||||
fmt++;
|
||||
}
|
||||
|
||||
// Default base
|
||||
base = 10;
|
||||
|
||||
switch (*fmt)
|
||||
{
|
||||
case 'c':
|
||||
if (!(flags & LEFT))
|
||||
while (--field_width > 0)
|
||||
*str++ = ' ';
|
||||
*str++ = (unsigned char)va_arg(args, int);
|
||||
while (--field_width > 0)
|
||||
*str++ = ' ';
|
||||
continue;
|
||||
|
||||
case 's':
|
||||
s = va_arg(args, char *);
|
||||
if (!s)
|
||||
s = "<NULL>";
|
||||
len = strnlen(s, precision);
|
||||
if (!(flags & LEFT))
|
||||
while (len < field_width--)
|
||||
*str++ = ' ';
|
||||
for (i = 0; i < len; ++i)
|
||||
*str++ = *s++;
|
||||
while (len < field_width--)
|
||||
*str++ = ' ';
|
||||
continue;
|
||||
|
||||
case 'p':
|
||||
if (field_width == -1)
|
||||
{
|
||||
field_width = 2 * sizeof(void *);
|
||||
flags |= ZEROPAD;
|
||||
}
|
||||
str = number(str,
|
||||
(unsigned long)va_arg(args, void *),
|
||||
16,
|
||||
field_width,
|
||||
precision,
|
||||
flags);
|
||||
continue;
|
||||
|
||||
case 'A':
|
||||
flags |= UPPERCASE;
|
||||
|
||||
case 'a':
|
||||
if (qualifier == 'l')
|
||||
str = eaddr(str,
|
||||
va_arg(args, unsigned char *),
|
||||
field_width,
|
||||
precision,
|
||||
flags);
|
||||
else
|
||||
str = iaddr(str,
|
||||
va_arg(args, unsigned char *),
|
||||
field_width,
|
||||
precision,
|
||||
flags);
|
||||
continue;
|
||||
|
||||
// Integer number formats - set up the flags and "break"
|
||||
case 'o':
|
||||
base = 8;
|
||||
break;
|
||||
|
||||
case 'X':
|
||||
flags |= UPPERCASE;
|
||||
|
||||
case 'x':
|
||||
base = 16;
|
||||
break;
|
||||
|
||||
case 'd':
|
||||
case 'i':
|
||||
flags |= SIGN;
|
||||
|
||||
case 'u':
|
||||
break;
|
||||
|
||||
#if HAS_FLOAT
|
||||
|
||||
case 'f':
|
||||
str = flt(str,
|
||||
va_arg(args, double),
|
||||
field_width,
|
||||
precision,
|
||||
*fmt,
|
||||
flags | SIGN);
|
||||
continue;
|
||||
|
||||
#endif
|
||||
|
||||
default:
|
||||
if (*fmt != '%')
|
||||
*str++ = '%';
|
||||
if (*fmt)
|
||||
*str++ = *fmt;
|
||||
else
|
||||
--fmt;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (qualifier == 'l')
|
||||
num = va_arg(args, unsigned long);
|
||||
else if (flags & SIGN)
|
||||
num = va_arg(args, int);
|
||||
else
|
||||
num = va_arg(args, unsigned int);
|
||||
|
||||
str = number(str, num, base, field_width, precision, flags);
|
||||
}
|
||||
|
||||
*str = '\0';
|
||||
return str - buf;
|
||||
}
|
||||
|
||||
void
|
||||
uart_send_char(char c)
|
||||
{
|
||||
putchar(c);
|
||||
|
||||
/*
|
||||
if(c=='\n')
|
||||
{
|
||||
while(io.uart.stat&1); // uart busy, wait...
|
||||
io.uart.fifo = '\r';
|
||||
}
|
||||
|
||||
while(io.uart.stat&1); // uart busy, wait...
|
||||
io.uart.fifo = c;
|
||||
*/
|
||||
// #error "You must implement the method uart_send_char to use this file!\n";
|
||||
/* Output of a char to a UART usually follows the following model:
|
||||
Wait until UART is ready
|
||||
Write char to UART
|
||||
Wait until UART is done
|
||||
|
||||
Or in code:
|
||||
while (*UART_CONTROL_ADDRESS != UART_READY);
|
||||
*UART_DATA_ADDRESS = c;
|
||||
while (*UART_CONTROL_ADDRESS != UART_READY);
|
||||
|
||||
Check the UART sample code on your platform or the board
|
||||
documentation.
|
||||
*/
|
||||
}
|
||||
|
||||
int
|
||||
ee_printf(const char *fmt, ...)
|
||||
{
|
||||
char buf[1024], *p;
|
||||
va_list args;
|
||||
int n = 0;
|
||||
|
||||
va_start(args, fmt);
|
||||
ee_vsprintf(buf, fmt, args);
|
||||
va_end(args);
|
||||
p = buf;
|
||||
while (*p)
|
||||
{
|
||||
uart_send_char(*p);
|
||||
n++;
|
||||
p++;
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
@@ -0,0 +1,425 @@
|
||||
/*
|
||||
****************************************************************************
|
||||
*
|
||||
* "DHRYSTONE" Benchmark Program
|
||||
* -----------------------------
|
||||
*
|
||||
* Version: C, Version 2.1
|
||||
*
|
||||
* File: dhry.h (part 1 of 3)
|
||||
*
|
||||
* Date: May 25, 1988
|
||||
*
|
||||
* Author: Reinhold P. Weicker
|
||||
* Siemens AG, AUT E 51
|
||||
* Postfach 3220
|
||||
* 8520 Erlangen
|
||||
* Germany (West)
|
||||
* Phone: [+49]-9131-7-20330
|
||||
* (8-17 Central European Time)
|
||||
* Usenet: ..!mcsun!unido!estevax!weicker
|
||||
*
|
||||
* Original Version (in Ada) published in
|
||||
* "Communications of the ACM" vol. 27., no. 10 (Oct. 1984),
|
||||
* pp. 1013 - 1030, together with the statistics
|
||||
* on which the distribution of statements etc. is based.
|
||||
*
|
||||
* In this C version, the following C library functions are used:
|
||||
* - strcpy, strcmp (inside the measurement loop)
|
||||
* - printf, scanf (outside the measurement loop)
|
||||
* In addition, Berkeley UNIX system calls "times ()" or "time ()"
|
||||
* are used for execution time measurement. For measurements
|
||||
* on other systems, these calls have to be changed.
|
||||
*
|
||||
* Collection of Results:
|
||||
* Reinhold Weicker (address see above) and
|
||||
*
|
||||
* Rick Richardson
|
||||
* PC Research. Inc.
|
||||
* 94 Apple Orchard Drive
|
||||
* Tinton Falls, NJ 07724
|
||||
* Phone: (201) 389-8963 (9-17 EST)
|
||||
* Usenet: ...!uunet!pcrat!rick
|
||||
*
|
||||
* Please send results to Rick Richardson and/or Reinhold Weicker.
|
||||
* Complete information should be given on hardware and software used.
|
||||
* Hardware information includes: Machine type, CPU, type and size
|
||||
* of caches; for microprocessors: clock frequency, memory speed
|
||||
* (number of wait states).
|
||||
* Software information includes: Compiler (and runtime library)
|
||||
* manufacturer and version, compilation switches, OS version.
|
||||
* The Operating System version may give an indication about the
|
||||
* compiler; Dhrystone itself performs no OS calls in the measurement loop.
|
||||
*
|
||||
* The complete output generated by the program should be mailed
|
||||
* such that at least some checks for correctness can be made.
|
||||
*
|
||||
***************************************************************************
|
||||
*
|
||||
* History: This version C/2.1 has been made for two reasons:
|
||||
*
|
||||
* 1) There is an obvious need for a common C version of
|
||||
* Dhrystone, since C is at present the most popular system
|
||||
* programming language for the class of processors
|
||||
* (microcomputers, minicomputers) where Dhrystone is used most.
|
||||
* There should be, as far as possible, only one C version of
|
||||
* Dhrystone such that results can be compared without
|
||||
* restrictions. In the past, the C versions distributed
|
||||
* by Rick Richardson (Version 1.1) and by Reinhold Weicker
|
||||
* had small (though not significant) differences.
|
||||
*
|
||||
* 2) As far as it is possible without changes to the Dhrystone
|
||||
* statistics, optimizing compilers should be prevented from
|
||||
* removing significant statements.
|
||||
*
|
||||
* This C version has been developed in cooperation with
|
||||
* Rick Richardson (Tinton Falls, NJ), it incorporates many
|
||||
* ideas from the "Version 1.1" distributed previously by
|
||||
* him over the UNIX network Usenet.
|
||||
* I also thank Chaim Benedelac (National Semiconductor),
|
||||
* David Ditzel (SUN), Earl Killian and John Mashey (MIPS),
|
||||
* Alan Smith and Rafael Saavedra-Barrera (UC at Berkeley)
|
||||
* for their help with comments on earlier versions of the
|
||||
* benchmark.
|
||||
*
|
||||
* Changes: In the initialization part, this version follows mostly
|
||||
* Rick Richardson's version distributed via Usenet, not the
|
||||
* version distributed earlier via floppy disk by Reinhold Weicker.
|
||||
* As a concession to older compilers, names have been made
|
||||
* unique within the first 8 characters.
|
||||
* Inside the measurement loop, this version follows the
|
||||
* version previously distributed by Reinhold Weicker.
|
||||
*
|
||||
* At several places in the benchmark, code has been added,
|
||||
* but within the measurement loop only in branches that
|
||||
* are not executed. The intention is that optimizing compilers
|
||||
* should be prevented from moving code out of the measurement
|
||||
* loop, or from removing code altogether. Since the statements
|
||||
* that are executed within the measurement loop have NOT been
|
||||
* changed, the numbers defining the "Dhrystone distribution"
|
||||
* (distribution of statements, operand types and locality)
|
||||
* still hold. Except for sophisticated optimizing compilers,
|
||||
* execution times for this version should be the same as
|
||||
* for previous versions.
|
||||
*
|
||||
* Since it has proven difficult to subtract the time for the
|
||||
* measurement loop overhead in a correct way, the loop check
|
||||
* has been made a part of the benchmark. This does have
|
||||
* an impact - though a very minor one - on the distribution
|
||||
* statistics which have been updated for this version.
|
||||
*
|
||||
* All changes within the measurement loop are described
|
||||
* and discussed in the companion paper "Rationale for
|
||||
* Dhrystone version 2".
|
||||
*
|
||||
* Because of the self-imposed limitation that the order and
|
||||
* distribution of the executed statements should not be
|
||||
* changed, there are still cases where optimizing compilers
|
||||
* may not generate code for some statements. To a certain
|
||||
* degree, this is unavoidable for small synthetic benchmarks.
|
||||
* Users of the benchmark are advised to check code listings
|
||||
* whether code is generated for all statements of Dhrystone.
|
||||
*
|
||||
* Version 2.1 is identical to version 2.0 distributed via
|
||||
* the UNIX network Usenet in March 1988 except that it corrects
|
||||
* some minor deficiencies that were found by users of version 2.0.
|
||||
* The only change within the measurement loop is that a
|
||||
* non-executed "else" part was added to the "if" statement in
|
||||
* Func_3, and a non-executed "else" part removed from Proc_3.
|
||||
*
|
||||
***************************************************************************
|
||||
*
|
||||
* Defines: The following "Defines" are possible:
|
||||
* -DREG=register (default: Not defined)
|
||||
* As an approximation to what an average C programmer
|
||||
* might do, the "register" storage class is applied
|
||||
* (if enabled by -DREG=register)
|
||||
* - for local variables, if they are used (dynamically)
|
||||
* five or more times
|
||||
* - for parameters if they are used (dynamically)
|
||||
* six or more times
|
||||
* Note that an optimal "register" strategy is
|
||||
* compiler-dependent, and that "register" declarations
|
||||
* do not necessarily lead to faster execution.
|
||||
* -DNOSTRUCTASSIGN (default: Not defined)
|
||||
* Define if the C compiler does not support
|
||||
* assignment of structures.
|
||||
* -DNOENUMS (default: Not defined)
|
||||
* Define if the C compiler does not support
|
||||
* enumeration types.
|
||||
* -DTIMES (default)
|
||||
* -DTIME
|
||||
* The "times" function of UNIX (returning process times)
|
||||
* or the "time" function (returning wallclock time)
|
||||
* is used for measurement.
|
||||
* For single user machines, "time ()" is adequate. For
|
||||
* multi-user machines where you cannot get single-user
|
||||
* access, use the "times ()" function. If you have
|
||||
* neither, use a stopwatch in the dead of night.
|
||||
* "printf"s are provided marking the points "Start Timer"
|
||||
* and "Stop Timer". DO NOT use the UNIX "time(1)"
|
||||
* command, as this will measure the total time to
|
||||
* run this program, which will (erroneously) include
|
||||
* the time to allocate storage (malloc) and to perform
|
||||
* the initialization.
|
||||
* -DHZ=nnn
|
||||
* In Berkeley UNIX, the function "times" returns process
|
||||
* time in 1/HZ seconds, with HZ = 60 for most systems.
|
||||
* CHECK YOUR SYSTEM DESCRIPTION BEFORE YOU JUST APPLY
|
||||
* A VALUE.
|
||||
*
|
||||
***************************************************************************
|
||||
*
|
||||
* Compilation model and measurement (IMPORTANT):
|
||||
*
|
||||
* This C version of Dhrystone consists of three files:
|
||||
* - dhry.h (this file, containing global definitions and comments)
|
||||
* - dhry_1.c (containing the code corresponding to Ada package Pack_1)
|
||||
* - dhry_2.c (containing the code corresponding to Ada package Pack_2)
|
||||
*
|
||||
* The following "ground rules" apply for measurements:
|
||||
* - Separate compilation
|
||||
* - No procedure merging
|
||||
* - Otherwise, compiler optimizations are allowed but should be indicated
|
||||
* - Default results are those without register declarations
|
||||
* See the companion paper "Rationale for Dhrystone Version 2" for a more
|
||||
* detailed discussion of these ground rules.
|
||||
*
|
||||
* For 16-Bit processors (e.g. 80186, 80286), times for all compilation
|
||||
* models ("small", "medium", "large" etc.) should be given if possible,
|
||||
* together with a definition of these models for the compiler system used.
|
||||
*
|
||||
**************************************************************************
|
||||
*
|
||||
* Dhrystone (C version) statistics:
|
||||
*
|
||||
* [Comment from the first distribution, updated for version 2.
|
||||
* Note that because of language differences, the numbers are slightly
|
||||
* different from the Ada version.]
|
||||
*
|
||||
* The following program contains statements of a high level programming
|
||||
* language (here: C) in a distribution considered representative:
|
||||
*
|
||||
* assignments 52 (51.0 %)
|
||||
* control statements 33 (32.4 %)
|
||||
* procedure, function calls 17 (16.7 %)
|
||||
*
|
||||
* 103 statements are dynamically executed. The program is balanced with
|
||||
* respect to the three aspects:
|
||||
*
|
||||
* - statement type
|
||||
* - operand type
|
||||
* - operand locality
|
||||
* operand global, local, parameter, or constant.
|
||||
*
|
||||
* The combination of these three aspects is balanced only approximately.
|
||||
*
|
||||
* 1. Statement Type:
|
||||
* ----------------- number
|
||||
*
|
||||
* V1 = V2 9
|
||||
* (incl. V1 = F(..)
|
||||
* V = Constant 12
|
||||
* Assignment, 7
|
||||
* with array element
|
||||
* Assignment, 6
|
||||
* with record component
|
||||
* --
|
||||
* 34 34
|
||||
*
|
||||
* X = Y +|-|"&&"|"|" Z 5
|
||||
* X = Y +|-|"==" Constant 6
|
||||
* X = X +|- 1 3
|
||||
* X = Y *|/ Z 2
|
||||
* X = Expression, 1
|
||||
* two operators
|
||||
* X = Expression, 1
|
||||
* three operators
|
||||
* --
|
||||
* 18 18
|
||||
*
|
||||
* if .... 14
|
||||
* with "else" 7
|
||||
* without "else" 7
|
||||
* executed 3
|
||||
* not executed 4
|
||||
* for ... 7 | counted every time
|
||||
* while ... 4 | the loop condition
|
||||
* do ... while 1 | is evaluated
|
||||
* switch ... 1
|
||||
* break 1
|
||||
* declaration with 1
|
||||
* initialization
|
||||
* --
|
||||
* 34 34
|
||||
*
|
||||
* P (...) procedure call 11
|
||||
* user procedure 10
|
||||
* library procedure 1
|
||||
* X = F (...)
|
||||
* function call 6
|
||||
* user function 5
|
||||
* library function 1
|
||||
* --
|
||||
* 17 17
|
||||
* ---
|
||||
* 103
|
||||
*
|
||||
* The average number of parameters in procedure or function calls
|
||||
* is 1.82 (not counting the function values as implicit parameters).
|
||||
*
|
||||
*
|
||||
* 2. Operators
|
||||
* ------------
|
||||
* number approximate
|
||||
* percentage
|
||||
*
|
||||
* Arithmetic 32 50.8
|
||||
*
|
||||
* + 21 33.3
|
||||
* - 7 11.1
|
||||
* * 3 4.8
|
||||
* / (int div) 1 1.6
|
||||
*
|
||||
* Comparison 27 42.8
|
||||
*
|
||||
* == 9 14.3
|
||||
* /= 4 6.3
|
||||
* > 1 1.6
|
||||
* < 3 4.8
|
||||
* >= 1 1.6
|
||||
* <= 9 14.3
|
||||
*
|
||||
* Logic 4 6.3
|
||||
*
|
||||
* && (AND-THEN) 1 1.6
|
||||
* | (OR) 1 1.6
|
||||
* ! (NOT) 2 3.2
|
||||
*
|
||||
* -- -----
|
||||
* 63 100.1
|
||||
*
|
||||
*
|
||||
* 3. Operand Type (counted once per operand reference):
|
||||
* ---------------
|
||||
* number approximate
|
||||
* percentage
|
||||
*
|
||||
* Integer 175 72.3 %
|
||||
* Character 45 18.6 %
|
||||
* Pointer 12 5.0 %
|
||||
* String30 6 2.5 %
|
||||
* Array 2 0.8 %
|
||||
* Record 2 0.8 %
|
||||
* --- -------
|
||||
* 242 100.0 %
|
||||
*
|
||||
* When there is an access path leading to the final operand (e.g. a record
|
||||
* component), only the final data type on the access path is counted.
|
||||
*
|
||||
*
|
||||
* 4. Operand Locality:
|
||||
* -------------------
|
||||
* number approximate
|
||||
* percentage
|
||||
*
|
||||
* local variable 114 47.1 %
|
||||
* global variable 22 9.1 %
|
||||
* parameter 45 18.6 %
|
||||
* value 23 9.5 %
|
||||
* reference 22 9.1 %
|
||||
* function result 6 2.5 %
|
||||
* constant 55 22.7 %
|
||||
* --- -------
|
||||
* 242 100.0 %
|
||||
*
|
||||
*
|
||||
* The program does not compute anything meaningful, but it is syntactically
|
||||
* and semantically correct. All variables have a value assigned to them
|
||||
* before they are used as a source operand.
|
||||
*
|
||||
* There has been no explicit effort to account for the effects of a
|
||||
* cache, or to balance the use of long or short displacements for code or
|
||||
* data.
|
||||
*
|
||||
***************************************************************************
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
/* Compiler and system dependent definitions: */
|
||||
|
||||
#ifndef TIME
|
||||
#define TIMES
|
||||
#endif
|
||||
/* Use times(2) time function unless */
|
||||
/* explicitly defined otherwise */
|
||||
|
||||
#ifdef TIMES
|
||||
#include <sys/types.h>
|
||||
#include <sys/times.h>
|
||||
/* for "times" */
|
||||
#endif
|
||||
|
||||
#define Mic_secs_Per_Second 80000000.0
|
||||
/* Berkeley UNIX C returns process times in seconds/HZ */
|
||||
|
||||
#ifdef NOSTRUCTASSIGN
|
||||
#define structassign(d, s) memcpy(&(d), &(s), sizeof(d))
|
||||
#else
|
||||
#define structassign(d, s) d = s
|
||||
#endif
|
||||
|
||||
#ifdef NOENUM
|
||||
#define Ident_1 0
|
||||
#define Ident_2 1
|
||||
#define Ident_3 2
|
||||
#define Ident_4 3
|
||||
#define Ident_5 4
|
||||
typedef int Enumeration;
|
||||
#else
|
||||
typedef enum {Ident_1, Ident_2, Ident_3, Ident_4, Ident_5}
|
||||
Enumeration;
|
||||
#endif
|
||||
/* for boolean and enumeration types in Ada, Pascal */
|
||||
|
||||
/* General definitions: */
|
||||
|
||||
//#include <stdio.h>
|
||||
/* for strcpy, strcmp */
|
||||
|
||||
#define Null 0
|
||||
/* Value of a Null pointer */
|
||||
#define true 1
|
||||
#define false 0
|
||||
|
||||
typedef int One_Thirty;
|
||||
typedef int One_Fifty;
|
||||
typedef char Capital_Letter;
|
||||
typedef int Boolean;
|
||||
typedef char Str_30 [31];
|
||||
typedef int Arr_1_Dim [50];
|
||||
typedef int Arr_2_Dim [50] [50];
|
||||
|
||||
typedef struct record
|
||||
{
|
||||
struct record *Ptr_Comp;
|
||||
Enumeration Discr;
|
||||
union {
|
||||
struct {
|
||||
Enumeration Enum_Comp;
|
||||
int Int_Comp;
|
||||
char Str_Comp [31];
|
||||
} var_1;
|
||||
struct {
|
||||
Enumeration E_Comp_2;
|
||||
char Str_2_Comp [31];
|
||||
} var_2;
|
||||
struct {
|
||||
char Ch_1_Comp;
|
||||
char Ch_2_Comp;
|
||||
} var_3;
|
||||
} variant;
|
||||
} Rec_Type, *Rec_Pointer;
|
||||
|
||||
|
||||
@@ -0,0 +1,384 @@
|
||||
/*
|
||||
****************************************************************************
|
||||
*
|
||||
* "DHRYSTONE" Benchmark Program
|
||||
* -----------------------------
|
||||
*
|
||||
* Version: C, Version 2.1
|
||||
*
|
||||
* File: dhry_1.c (part 2 of 3)
|
||||
*
|
||||
* Date: May 25, 1988
|
||||
*
|
||||
* Author: Reinhold P. Weicker
|
||||
*
|
||||
****************************************************************************
|
||||
*/
|
||||
|
||||
#include "dhry.h"
|
||||
#include <stdint.h>
|
||||
|
||||
/* Global Variables: */
|
||||
|
||||
Rec_Pointer Ptr_Glob,
|
||||
Next_Ptr_Glob;
|
||||
int Int_Glob;
|
||||
Boolean Bool_Glob;
|
||||
char Ch_1_Glob,
|
||||
Ch_2_Glob;
|
||||
int Arr_1_Glob [50];
|
||||
int Arr_2_Glob [50] [50];
|
||||
|
||||
Enumeration Func_1 ();
|
||||
/* forward declaration necessary since Enumeration may not simply be int */
|
||||
|
||||
#ifndef REG
|
||||
Boolean Reg = false;
|
||||
#define REG
|
||||
/* REG becomes defined as empty */
|
||||
/* i.e. no register variables */
|
||||
#else
|
||||
Boolean Reg = true;
|
||||
#endif
|
||||
|
||||
/* variables for time measurement: */
|
||||
extern uint64_t rdcycle();
|
||||
extern uint64_t rdinstret();
|
||||
uint64_t Begin_Time,
|
||||
End_Time,
|
||||
User_Time;
|
||||
uint64_t Begin_Insn,
|
||||
End_Insn,
|
||||
User_Insn;
|
||||
/* end of variables for time measurement */
|
||||
|
||||
|
||||
main ()
|
||||
/*****/
|
||||
|
||||
/* main program, corresponds to procedures */
|
||||
/* Main and Proc_0 in the Ada version */
|
||||
{
|
||||
One_Fifty Int_1_Loc;
|
||||
REG One_Fifty Int_2_Loc;
|
||||
One_Fifty Int_3_Loc;
|
||||
REG char Ch_Index;
|
||||
Enumeration Enum_Loc;
|
||||
Str_30 Str_1_Loc;
|
||||
Str_30 Str_2_Loc;
|
||||
REG int Run_Index;
|
||||
REG int Number_Of_Runs;
|
||||
|
||||
Rec_Type R1,R2;
|
||||
|
||||
/* Initializations */
|
||||
|
||||
|
||||
/*
|
||||
* FEMTOSOC/FEMTORV32 modifications ===========================
|
||||
*/
|
||||
|
||||
/*
|
||||
* Since there are only two calls to malloc(), and that malloc()
|
||||
* is not supported yet by femtosoc lib, I replaced them with
|
||||
* pre-allocated structures.
|
||||
*/
|
||||
Next_Ptr_Glob = &R1; // (Rec_Pointer) malloc (sizeof (Rec_Type));
|
||||
Ptr_Glob = &R2; // (Rec_Pointer) malloc (sizeof (Rec_Type));
|
||||
|
||||
/*
|
||||
* End of FEMTOSOC/FEMTORV32 modifications ======================
|
||||
*/
|
||||
Ptr_Glob->Ptr_Comp = Next_Ptr_Glob;
|
||||
Ptr_Glob->Discr = Ident_1;
|
||||
Ptr_Glob->variant.var_1.Enum_Comp = Ident_3;
|
||||
Ptr_Glob->variant.var_1.Int_Comp = 40;
|
||||
strcpy (Ptr_Glob->variant.var_1.Str_Comp,
|
||||
"DHRYSTONE PROGRAM, SOME STRING");
|
||||
strcpy (Str_1_Loc, "DHRYSTONE PROGRAM, 1'ST STRING");
|
||||
|
||||
Arr_2_Glob [8][7] = 10;
|
||||
/* Was missing in published program. Without this statement, */
|
||||
/* Arr_2_Glob [8][7] would have an undefined value. */
|
||||
/* Warning: With 16-Bit processors and Number_Of_Runs > 32000, */
|
||||
/* overflow may occur for this array element. */
|
||||
|
||||
printf ("\n");
|
||||
printf ("Dhrystone Benchmark, Version 2.1 (Language: C)\n");
|
||||
printf ("\n");
|
||||
if (Reg)
|
||||
{
|
||||
printf ("Program compiled with 'register' attribute\n");
|
||||
printf ("\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
printf ("Program compiled without 'register' attribute\n");
|
||||
printf ("\n");
|
||||
}
|
||||
printf ("Please give the number of runs through the benchmark: ");
|
||||
{
|
||||
// int n;
|
||||
// scanf ("%d", &n);
|
||||
Number_Of_Runs = 50000;
|
||||
}
|
||||
printf ("\n");
|
||||
|
||||
printf ("Execution starts, %d runs through Dhrystone\n", Number_Of_Runs);
|
||||
|
||||
/***************/
|
||||
/* Start timer */
|
||||
/***************/
|
||||
|
||||
Begin_Time = rdcycle();
|
||||
Begin_Insn = rdinstret();
|
||||
|
||||
printf(">>> Begin_time=%d\n", (int)Begin_Time);
|
||||
printf(">>> Begin_insn=%d\n", (int)Begin_Insn);
|
||||
|
||||
for (Run_Index = 1; Run_Index <= Number_Of_Runs; ++Run_Index)
|
||||
{
|
||||
Proc_5();
|
||||
Proc_4();
|
||||
/* Ch_1_Glob == 'A', Ch_2_Glob == 'B', Bool_Glob == true */
|
||||
Int_1_Loc = 2;
|
||||
Int_2_Loc = 3;
|
||||
strcpy (Str_2_Loc, "DHRYSTONE PROGRAM, 2'ND STRING");
|
||||
Enum_Loc = Ident_2;
|
||||
Bool_Glob = ! Func_2 (Str_1_Loc, Str_2_Loc);
|
||||
/* Bool_Glob == 1 */
|
||||
while (Int_1_Loc < Int_2_Loc) /* loop body executed once */
|
||||
{
|
||||
Int_3_Loc = 5 * Int_1_Loc - Int_2_Loc;
|
||||
/* Int_3_Loc == 7 */
|
||||
Proc_7 (Int_1_Loc, Int_2_Loc, &Int_3_Loc);
|
||||
/* Int_3_Loc == 7 */
|
||||
Int_1_Loc += 1;
|
||||
} /* while */
|
||||
/* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */
|
||||
Proc_8 (Arr_1_Glob, Arr_2_Glob, Int_1_Loc, Int_3_Loc);
|
||||
/* Int_Glob == 5 */
|
||||
Proc_1 (Ptr_Glob);
|
||||
for (Ch_Index = 'A'; Ch_Index <= Ch_2_Glob; ++Ch_Index)
|
||||
/* loop body executed twice */
|
||||
{
|
||||
if (Enum_Loc == Func_1 (Ch_Index, 'C'))
|
||||
/* then, not executed */
|
||||
{
|
||||
Proc_6 (Ident_1, &Enum_Loc);
|
||||
strcpy (Str_2_Loc, "DHRYSTONE PROGRAM, 3'RD STRING");
|
||||
Int_2_Loc = Run_Index;
|
||||
Int_Glob = Run_Index;
|
||||
}
|
||||
}
|
||||
/* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */
|
||||
Int_2_Loc = Int_2_Loc * Int_1_Loc;
|
||||
Int_1_Loc = Int_2_Loc / Int_3_Loc;
|
||||
Int_2_Loc = 7 * (Int_2_Loc - Int_3_Loc) - Int_1_Loc;
|
||||
/* Int_1_Loc == 1, Int_2_Loc == 13, Int_3_Loc == 7 */
|
||||
Proc_2 (&Int_1_Loc);
|
||||
/* Int_1_Loc == 5 */
|
||||
|
||||
} /* loop "for Run_Index" */
|
||||
|
||||
/**************/
|
||||
/* Stop timer */
|
||||
/**************/
|
||||
|
||||
End_Time = rdcycle();
|
||||
End_Insn = rdinstret();
|
||||
|
||||
printf ("Execution ends\n");
|
||||
printf ("\n");
|
||||
printf ("Final values of the variables used in the benchmark:\n");
|
||||
printf ("\n");
|
||||
printf ("Int_Glob: %d\n", Int_Glob);
|
||||
printf (" should be: %d\n", 5);
|
||||
printf ("Bool_Glob: %d\n", Bool_Glob);
|
||||
printf (" should be: %d\n", 1);
|
||||
printf ("Ch_1_Glob: %c\n", Ch_1_Glob);
|
||||
printf (" should be: %c\n", 'A');
|
||||
printf ("Ch_2_Glob: %c\n", Ch_2_Glob);
|
||||
printf (" should be: %c\n", 'B');
|
||||
printf ("Arr_1_Glob[8]: %d\n", Arr_1_Glob[8]);
|
||||
printf (" should be: %d\n", 7);
|
||||
printf ("Arr_2_Glob[8][7]: %d\n", Arr_2_Glob[8][7]);
|
||||
printf (" should be: Number_Of_Runs + 10\n");
|
||||
printf ("Ptr_Glob->\n");
|
||||
printf (" Ptr_Comp: %d\n", (int) Ptr_Glob->Ptr_Comp);
|
||||
printf (" should be: (implementation-dependent)\n");
|
||||
printf (" Discr: %d\n", Ptr_Glob->Discr);
|
||||
printf (" should be: %d\n", 0);
|
||||
printf (" Enum_Comp: %d\n", Ptr_Glob->variant.var_1.Enum_Comp);
|
||||
printf (" should be: %d\n", 2);
|
||||
printf (" Int_Comp: %d\n", Ptr_Glob->variant.var_1.Int_Comp);
|
||||
printf (" should be: %d\n", 17);
|
||||
printf (" Str_Comp: %s\n", Ptr_Glob->variant.var_1.Str_Comp);
|
||||
printf (" should be: DHRYSTONE PROGRAM, SOME STRING\n");
|
||||
printf ("Next_Ptr_Glob->\n");
|
||||
printf (" Ptr_Comp: %d\n", (int) Next_Ptr_Glob->Ptr_Comp);
|
||||
printf (" should be: (implementation-dependent), same as above\n");
|
||||
printf (" Discr: %d\n", Next_Ptr_Glob->Discr);
|
||||
printf (" should be: %d\n", 0);
|
||||
printf (" Enum_Comp: %d\n", Next_Ptr_Glob->variant.var_1.Enum_Comp);
|
||||
printf (" should be: %d\n", 1);
|
||||
printf (" Int_Comp: %d\n", Next_Ptr_Glob->variant.var_1.Int_Comp);
|
||||
printf (" should be: %d\n", 18);
|
||||
printf (" Str_Comp: %s\n",
|
||||
Next_Ptr_Glob->variant.var_1.Str_Comp);
|
||||
printf (" should be: DHRYSTONE PROGRAM, SOME STRING\n");
|
||||
printf ("Int_1_Loc: %d\n", Int_1_Loc);
|
||||
printf (" should be: %d\n", 5);
|
||||
printf ("Int_2_Loc: %d\n", Int_2_Loc);
|
||||
printf (" should be: %d\n", 13);
|
||||
printf ("Int_3_Loc: %d\n", Int_3_Loc);
|
||||
printf (" should be: %d\n", 7);
|
||||
printf ("Enum_Loc: %d\n", Enum_Loc);
|
||||
printf (" should be: %d\n", 1);
|
||||
printf ("Str_1_Loc: %s\n", Str_1_Loc);
|
||||
printf (" should be: DHRYSTONE PROGRAM, 1'ST STRING\n");
|
||||
printf ("Str_2_Loc: %s\n", Str_2_Loc);
|
||||
printf (" should be: DHRYSTONE PROGRAM, 2'ND STRING\n");
|
||||
printf ("\n");
|
||||
|
||||
User_Time = End_Time - Begin_Time;
|
||||
User_Insn = End_Insn - Begin_Insn;
|
||||
|
||||
printf("Number_Of_Runs: %d\n", Number_Of_Runs);
|
||||
printf("User_Time: %d cycles, %d insn\n", (int)User_Time, (int)User_Insn);
|
||||
|
||||
uint64_t Cycles_Per_Instruction_x1000 = (1000 * User_Time) / User_Insn;
|
||||
printf("Cycles_Per_Instruction: %d.%d%d%d\n",
|
||||
(int)( Cycles_Per_Instruction_x1000 / 1000),
|
||||
(int)((Cycles_Per_Instruction_x1000 / 100 ) % 10),
|
||||
(int)((Cycles_Per_Instruction_x1000 / 10 ) % 10),
|
||||
(int)((Cycles_Per_Instruction_x1000 / 1 ) % 10)
|
||||
);
|
||||
|
||||
show_CPI_2();
|
||||
|
||||
uint64_t Dhrystones_Per_Second_Per_MHz = ((uint64_t)Number_Of_Runs * 1000000) / User_Time;
|
||||
printf("Dhrystones_Per_Second_Per_MHz: %d\n", (int)Dhrystones_Per_Second_Per_MHz);
|
||||
|
||||
/*
|
||||
* "Another common representation of the Dhrystone benchmark is the DMIPS (Dhrystone MIPS) obtained
|
||||
* when the Dhrystone score is divided by 1757 (the number of Dhrystones per second obtained on the
|
||||
* VAX 11/780, nominally a 1 MIPS machine)."
|
||||
*/
|
||||
|
||||
int DMIPS_Per_MHz_x1000 = ((uint64_t)1000 * Dhrystones_Per_Second_Per_MHz) / 1757;
|
||||
printf("DMIPS_Per_MHz: %d.%d%d%d\n",
|
||||
(int)(DMIPS_Per_MHz_x1000 / 1000),
|
||||
(int)((DMIPS_Per_MHz_x1000 / 100) % 10),
|
||||
(int)((DMIPS_Per_MHz_x1000 / 10) % 10),
|
||||
(int)((DMIPS_Per_MHz_x1000 / 1) % 10));
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
Proc_1 (Ptr_Val_Par)
|
||||
/******************/
|
||||
|
||||
REG Rec_Pointer Ptr_Val_Par;
|
||||
/* executed once */
|
||||
{
|
||||
REG Rec_Pointer Next_Record = Ptr_Val_Par->Ptr_Comp;
|
||||
/* == Ptr_Glob_Next */
|
||||
/* Local variable, initialized with Ptr_Val_Par->Ptr_Comp, */
|
||||
/* corresponds to "rename" in Ada, "with" in Pascal */
|
||||
|
||||
structassign (*Ptr_Val_Par->Ptr_Comp, *Ptr_Glob);
|
||||
Ptr_Val_Par->variant.var_1.Int_Comp = 5;
|
||||
Next_Record->variant.var_1.Int_Comp
|
||||
= Ptr_Val_Par->variant.var_1.Int_Comp;
|
||||
Next_Record->Ptr_Comp = Ptr_Val_Par->Ptr_Comp;
|
||||
Proc_3 (&Next_Record->Ptr_Comp);
|
||||
/* Ptr_Val_Par->Ptr_Comp->Ptr_Comp
|
||||
== Ptr_Glob->Ptr_Comp */
|
||||
if (Next_Record->Discr == Ident_1)
|
||||
/* then, executed */
|
||||
{
|
||||
Next_Record->variant.var_1.Int_Comp = 6;
|
||||
Proc_6 (Ptr_Val_Par->variant.var_1.Enum_Comp,
|
||||
&Next_Record->variant.var_1.Enum_Comp);
|
||||
Next_Record->Ptr_Comp = Ptr_Glob->Ptr_Comp;
|
||||
Proc_7 (Next_Record->variant.var_1.Int_Comp, 10,
|
||||
&Next_Record->variant.var_1.Int_Comp);
|
||||
}
|
||||
else /* not executed */
|
||||
structassign (*Ptr_Val_Par, *Ptr_Val_Par->Ptr_Comp);
|
||||
} /* Proc_1 */
|
||||
|
||||
|
||||
Proc_2 (Int_Par_Ref)
|
||||
/******************/
|
||||
/* executed once */
|
||||
/* *Int_Par_Ref == 1, becomes 4 */
|
||||
|
||||
One_Fifty *Int_Par_Ref;
|
||||
{
|
||||
One_Fifty Int_Loc;
|
||||
Enumeration Enum_Loc;
|
||||
|
||||
Int_Loc = *Int_Par_Ref + 10;
|
||||
do /* executed once */
|
||||
if (Ch_1_Glob == 'A')
|
||||
/* then, executed */
|
||||
{
|
||||
Int_Loc -= 1;
|
||||
*Int_Par_Ref = Int_Loc - Int_Glob;
|
||||
Enum_Loc = Ident_1;
|
||||
} /* if */
|
||||
while (Enum_Loc != Ident_1); /* true */
|
||||
} /* Proc_2 */
|
||||
|
||||
|
||||
Proc_3 (Ptr_Ref_Par)
|
||||
/******************/
|
||||
/* executed once */
|
||||
/* Ptr_Ref_Par becomes Ptr_Glob */
|
||||
|
||||
Rec_Pointer *Ptr_Ref_Par;
|
||||
|
||||
{
|
||||
if (Ptr_Glob != Null)
|
||||
/* then, executed */
|
||||
*Ptr_Ref_Par = Ptr_Glob->Ptr_Comp;
|
||||
Proc_7 (10, Int_Glob, &Ptr_Glob->variant.var_1.Int_Comp);
|
||||
} /* Proc_3 */
|
||||
|
||||
|
||||
Proc_4 () /* without parameters */
|
||||
/*******/
|
||||
/* executed once */
|
||||
{
|
||||
Boolean Bool_Loc;
|
||||
|
||||
Bool_Loc = Ch_1_Glob == 'A';
|
||||
Bool_Glob = Bool_Loc | Bool_Glob;
|
||||
Ch_2_Glob = 'B';
|
||||
} /* Proc_4 */
|
||||
|
||||
|
||||
Proc_5 () /* without parameters */
|
||||
/*******/
|
||||
/* executed once */
|
||||
{
|
||||
Ch_1_Glob = 'A';
|
||||
Bool_Glob = false;
|
||||
} /* Proc_5 */
|
||||
|
||||
|
||||
/* Procedure for the assignment of structures, */
|
||||
/* if the C compiler doesn't support this feature */
|
||||
#ifdef NOSTRUCTASSIGN
|
||||
memcpy (d, s, l)
|
||||
register char *d;
|
||||
register char *s;
|
||||
register int l;
|
||||
{
|
||||
while (l--) *d++ = *s++;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@@ -0,0 +1,192 @@
|
||||
/*
|
||||
****************************************************************************
|
||||
*
|
||||
* "DHRYSTONE" Benchmark Program
|
||||
* -----------------------------
|
||||
*
|
||||
* Version: C, Version 2.1
|
||||
*
|
||||
* File: dhry_2.c (part 3 of 3)
|
||||
*
|
||||
* Date: May 25, 1988
|
||||
*
|
||||
* Author: Reinhold P. Weicker
|
||||
*
|
||||
****************************************************************************
|
||||
*/
|
||||
|
||||
#include "dhry.h"
|
||||
|
||||
#ifndef REG
|
||||
#define REG
|
||||
/* REG becomes defined as empty */
|
||||
/* i.e. no register variables */
|
||||
#endif
|
||||
|
||||
extern int Int_Glob;
|
||||
extern char Ch_1_Glob;
|
||||
|
||||
|
||||
Proc_6 (Enum_Val_Par, Enum_Ref_Par)
|
||||
/*********************************/
|
||||
/* executed once */
|
||||
/* Enum_Val_Par == Ident_3, Enum_Ref_Par becomes Ident_2 */
|
||||
|
||||
Enumeration Enum_Val_Par;
|
||||
Enumeration *Enum_Ref_Par;
|
||||
{
|
||||
*Enum_Ref_Par = Enum_Val_Par;
|
||||
if (! Func_3 (Enum_Val_Par))
|
||||
/* then, not executed */
|
||||
*Enum_Ref_Par = Ident_4;
|
||||
switch (Enum_Val_Par)
|
||||
{
|
||||
case Ident_1:
|
||||
*Enum_Ref_Par = Ident_1;
|
||||
break;
|
||||
case Ident_2:
|
||||
if (Int_Glob > 100)
|
||||
/* then */
|
||||
*Enum_Ref_Par = Ident_1;
|
||||
else *Enum_Ref_Par = Ident_4;
|
||||
break;
|
||||
case Ident_3: /* executed */
|
||||
*Enum_Ref_Par = Ident_2;
|
||||
break;
|
||||
case Ident_4: break;
|
||||
case Ident_5:
|
||||
*Enum_Ref_Par = Ident_3;
|
||||
break;
|
||||
} /* switch */
|
||||
} /* Proc_6 */
|
||||
|
||||
|
||||
Proc_7 (Int_1_Par_Val, Int_2_Par_Val, Int_Par_Ref)
|
||||
/**********************************************/
|
||||
/* executed three times */
|
||||
/* first call: Int_1_Par_Val == 2, Int_2_Par_Val == 3, */
|
||||
/* Int_Par_Ref becomes 7 */
|
||||
/* second call: Int_1_Par_Val == 10, Int_2_Par_Val == 5, */
|
||||
/* Int_Par_Ref becomes 17 */
|
||||
/* third call: Int_1_Par_Val == 6, Int_2_Par_Val == 10, */
|
||||
/* Int_Par_Ref becomes 18 */
|
||||
One_Fifty Int_1_Par_Val;
|
||||
One_Fifty Int_2_Par_Val;
|
||||
One_Fifty *Int_Par_Ref;
|
||||
{
|
||||
One_Fifty Int_Loc;
|
||||
|
||||
Int_Loc = Int_1_Par_Val + 2;
|
||||
*Int_Par_Ref = Int_2_Par_Val + Int_Loc;
|
||||
} /* Proc_7 */
|
||||
|
||||
|
||||
Proc_8 (Arr_1_Par_Ref, Arr_2_Par_Ref, Int_1_Par_Val, Int_2_Par_Val)
|
||||
/*********************************************************************/
|
||||
/* executed once */
|
||||
/* Int_Par_Val_1 == 3 */
|
||||
/* Int_Par_Val_2 == 7 */
|
||||
Arr_1_Dim Arr_1_Par_Ref;
|
||||
Arr_2_Dim Arr_2_Par_Ref;
|
||||
int Int_1_Par_Val;
|
||||
int Int_2_Par_Val;
|
||||
{
|
||||
REG One_Fifty Int_Index;
|
||||
REG One_Fifty Int_Loc;
|
||||
|
||||
Int_Loc = Int_1_Par_Val + 5;
|
||||
Arr_1_Par_Ref [Int_Loc] = Int_2_Par_Val;
|
||||
Arr_1_Par_Ref [Int_Loc+1] = Arr_1_Par_Ref [Int_Loc];
|
||||
Arr_1_Par_Ref [Int_Loc+30] = Int_Loc;
|
||||
for (Int_Index = Int_Loc; Int_Index <= Int_Loc+1; ++Int_Index)
|
||||
Arr_2_Par_Ref [Int_Loc] [Int_Index] = Int_Loc;
|
||||
Arr_2_Par_Ref [Int_Loc] [Int_Loc-1] += 1;
|
||||
Arr_2_Par_Ref [Int_Loc+20] [Int_Loc] = Arr_1_Par_Ref [Int_Loc];
|
||||
Int_Glob = 5;
|
||||
} /* Proc_8 */
|
||||
|
||||
|
||||
Enumeration Func_1 (Ch_1_Par_Val, Ch_2_Par_Val)
|
||||
/*************************************************/
|
||||
/* executed three times */
|
||||
/* first call: Ch_1_Par_Val == 'H', Ch_2_Par_Val == 'R' */
|
||||
/* second call: Ch_1_Par_Val == 'A', Ch_2_Par_Val == 'C' */
|
||||
/* third call: Ch_1_Par_Val == 'B', Ch_2_Par_Val == 'C' */
|
||||
|
||||
Capital_Letter Ch_1_Par_Val;
|
||||
Capital_Letter Ch_2_Par_Val;
|
||||
{
|
||||
Capital_Letter Ch_1_Loc;
|
||||
Capital_Letter Ch_2_Loc;
|
||||
|
||||
Ch_1_Loc = Ch_1_Par_Val;
|
||||
Ch_2_Loc = Ch_1_Loc;
|
||||
if (Ch_2_Loc != Ch_2_Par_Val)
|
||||
/* then, executed */
|
||||
return (Ident_1);
|
||||
else /* not executed */
|
||||
{
|
||||
Ch_1_Glob = Ch_1_Loc;
|
||||
return (Ident_2);
|
||||
}
|
||||
} /* Func_1 */
|
||||
|
||||
|
||||
Boolean Func_2 (Str_1_Par_Ref, Str_2_Par_Ref)
|
||||
/*************************************************/
|
||||
/* executed once */
|
||||
/* Str_1_Par_Ref == "DHRYSTONE PROGRAM, 1'ST STRING" */
|
||||
/* Str_2_Par_Ref == "DHRYSTONE PROGRAM, 2'ND STRING" */
|
||||
|
||||
Str_30 Str_1_Par_Ref;
|
||||
Str_30 Str_2_Par_Ref;
|
||||
{
|
||||
REG One_Thirty Int_Loc;
|
||||
Capital_Letter Ch_Loc;
|
||||
|
||||
Int_Loc = 2;
|
||||
while (Int_Loc <= 2) /* loop body executed once */
|
||||
if (Func_1 (Str_1_Par_Ref[Int_Loc],
|
||||
Str_2_Par_Ref[Int_Loc+1]) == Ident_1)
|
||||
/* then, executed */
|
||||
{
|
||||
Ch_Loc = 'A';
|
||||
Int_Loc += 1;
|
||||
} /* if, while */
|
||||
if (Ch_Loc >= 'W' && Ch_Loc < 'Z')
|
||||
/* then, not executed */
|
||||
Int_Loc = 7;
|
||||
if (Ch_Loc == 'R')
|
||||
/* then, not executed */
|
||||
return (true);
|
||||
else /* executed */
|
||||
{
|
||||
if (strcmp (Str_1_Par_Ref, Str_2_Par_Ref) > 0)
|
||||
/* then, not executed */
|
||||
{
|
||||
Int_Loc += 7;
|
||||
Int_Glob = Int_Loc;
|
||||
return (true);
|
||||
}
|
||||
else /* executed */
|
||||
return (false);
|
||||
} /* if Ch_Loc */
|
||||
} /* Func_2 */
|
||||
|
||||
|
||||
Boolean Func_3 (Enum_Par_Val)
|
||||
/***************************/
|
||||
/* executed once */
|
||||
/* Enum_Par_Val == Ident_3 */
|
||||
Enumeration Enum_Par_Val;
|
||||
{
|
||||
Enumeration Enum_Loc;
|
||||
|
||||
Enum_Loc = Enum_Par_Val;
|
||||
if (Enum_Loc == Ident_3)
|
||||
/* then, executed */
|
||||
return (true);
|
||||
else /* not executed */
|
||||
return (false);
|
||||
} /* Func_3 */
|
||||
|
||||
@@ -0,0 +1,56 @@
|
||||
#include <stdint.h>
|
||||
#include <perf.h>
|
||||
|
||||
uint64_t time() {
|
||||
return rdcycle();
|
||||
}
|
||||
|
||||
uint64_t insn() {
|
||||
return rdinstret();
|
||||
}
|
||||
|
||||
char *strcpy(char *dest, const char *src) {
|
||||
char* result = dest;
|
||||
while(*dest++=*src++);
|
||||
return result;
|
||||
}
|
||||
|
||||
int strcmp (const char *p1, const char *p2) {
|
||||
const unsigned char *s1 = (const unsigned char *) p1;
|
||||
const unsigned char *s2 = (const unsigned char *) p2;
|
||||
unsigned char c1, c2;
|
||||
do {
|
||||
c1 = (unsigned char) *s1++;
|
||||
c2 = (unsigned char) *s2++;
|
||||
if (c1 == '\0') {
|
||||
return c1 - c2;
|
||||
}
|
||||
}
|
||||
while (c1 == c2);
|
||||
return c1 - c2;
|
||||
}
|
||||
|
||||
/*************************************************************/
|
||||
|
||||
// Print "fixed point" number (integer/1000)
|
||||
void printk(uint64_t kx) {
|
||||
int intpart = (int)(kx / 1000);
|
||||
int fracpart = (int)(kx % 1000);
|
||||
printf("%d.",intpart);
|
||||
if(fracpart<100) {
|
||||
printf("0");
|
||||
}
|
||||
if(fracpart<10) {
|
||||
printf("0");
|
||||
}
|
||||
printf("%d",fracpart);
|
||||
}
|
||||
|
||||
void show_CPI_2() {
|
||||
uint64_t instret = rdinstret();
|
||||
uint64_t cycles = rdcycle();
|
||||
uint64_t kCPI = cycles*1000/instret;
|
||||
printf(">>> CPI ="); printk(kCPI); printf("\n");
|
||||
printf(">>> instret = %d\n", (int)(instret));
|
||||
printf(">>> cycles = %d\n", (int)(cycles));
|
||||
}
|
||||
@@ -0,0 +1,460 @@
|
||||
/**
|
||||
* ansi_graphics.h
|
||||
* A couple of function to display graphics in the terminal,
|
||||
* using ansi sequences.
|
||||
* Bruno Levy, Jan 2024
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifndef GL_FPS
|
||||
#define GL_FPS 30
|
||||
#endif
|
||||
|
||||
#if defined(__linux__) || defined(_WIN32) || defined(__APPLE__)
|
||||
#define BIGCPU // we are compiling for a real machine
|
||||
#else
|
||||
#define TINYCPU // we are compiling for a softwore
|
||||
#endif
|
||||
|
||||
#ifdef __linux__
|
||||
#include <unistd.h> // for usleep()
|
||||
#endif
|
||||
|
||||
// You can define GL_width and GL_height before
|
||||
// #including ansi_graphics.h in case the plain
|
||||
// old 80x25 pixels does not suffice.
|
||||
|
||||
#ifndef GL_width
|
||||
#define GL_width 80
|
||||
#endif
|
||||
|
||||
#ifndef GL_height
|
||||
#define GL_height 25
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \brief Sets the current graphics position
|
||||
* \param[in] x typically in 0,79
|
||||
* \param[in] y typically in 0,24
|
||||
*/
|
||||
static inline void GL_gotoxy(int x, int y) {
|
||||
printf("\033[%d;%dH",y,x);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Sets the current graphics position
|
||||
* \param[in] R , G , B the RGB color of the pixel, in [0..255]
|
||||
* \details Typically used by programs that draw all pixels sequentially,
|
||||
* like a raytracer. After each line, one can either printf("\n") or
|
||||
* call GL_gotoxy(). If you want to draw individual pixels in an
|
||||
* arbitrary order, use GL_setpixelRGB(x,y,R,G,B)
|
||||
*/
|
||||
static inline void GL_setpixelRGBhere(uint8_t R, uint8_t G, uint8_t B) {
|
||||
// set background color, print space
|
||||
printf("\033[48;2;%d;%d;%dm ",(int)R,(int)G,(int)B);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* \brief Draws two "pixels" at the current
|
||||
* cursor position and advances the current cursor
|
||||
* position.
|
||||
* \details Characters are roughly twice as high as wide.
|
||||
* To generate square pixels, this function draws two pixels in
|
||||
* the same character, using the special lower-half white / upper-half
|
||||
* black character, and setting the background and foreground colors.
|
||||
*/
|
||||
static inline void GL_set2pixelsRGBhere(
|
||||
uint8_t r1, uint8_t g1, uint8_t b1,
|
||||
uint8_t r2, uint8_t g2, uint8_t b2
|
||||
) {
|
||||
if((r2 == r1) && (g2 == g1) && (b2 == b1)) {
|
||||
GL_setpixelRGBhere(r1,g1,b1);
|
||||
} else {
|
||||
printf("\033[48;2;%d;%d;%dm",(int)r1,(int)g1,(int)b1);
|
||||
printf("\033[38;2;%d;%d;%dm",(int)r2,(int)g2,(int)b2);
|
||||
// https://www.w3.org/TR/xml-entity-names/025.html
|
||||
// https://onlineunicodetools.com/convert-unicode-to-utf8
|
||||
// https://copypastecharacter.com/
|
||||
printf("\xE2\x96\x83");
|
||||
}
|
||||
}
|
||||
|
||||
#define GL_RGB(R,G,B) #R ";" #G ";" #B
|
||||
|
||||
static inline void GL_setpixelIhere(
|
||||
const char** cmap, int c
|
||||
) {
|
||||
// set background color, print space
|
||||
printf("\033[48;2;%sm ",cmap[c]);
|
||||
}
|
||||
|
||||
static inline void GL_set2pixelsIhere(
|
||||
const char** cmap, int c1, int c2
|
||||
) {
|
||||
if(c1 == c2) {
|
||||
GL_setpixelIhere(cmap, c1);
|
||||
} else {
|
||||
printf("\033[48;2;%sm",cmap[c1]);
|
||||
printf("\033[38;2;%sm",cmap[c2]);
|
||||
// https://www.w3.org/TR/xml-entity-names/025.html
|
||||
// https://onlineunicodetools.com/convert-unicode-to-utf8
|
||||
// https://copypastecharacter.com/
|
||||
printf("\xE2\x96\x83");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Moves the cursor position to the next line.
|
||||
* \details Background and foreground colors are set to black.
|
||||
*/
|
||||
static inline void GL_newline() {
|
||||
printf("\033[38;2;0;0;0m");
|
||||
printf("\033[48;2;0;0;0m\n");
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Sets the color of a pixel
|
||||
* \param[in] x typically in 0,79
|
||||
* \param[in] y typically in 0,24
|
||||
* \param[in] R , G , B the RGB color of the pixel, in [0..255]
|
||||
*/
|
||||
static inline void GL_setpixelRGB(
|
||||
int x, int y, uint8_t R, uint8_t G, uint8_t B
|
||||
) {
|
||||
GL_gotoxy(x,y);
|
||||
GL_setpixelRGBhere(R,G,B);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief restore default foreground and background colors
|
||||
*/
|
||||
static inline void GL_restore_default_colors() {
|
||||
printf(
|
||||
"\033[48;5;16m" // set background color black
|
||||
"\033[38;5;15m" // set foreground color white
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Call this function each time graphics should be cleared
|
||||
*/
|
||||
static inline void GL_clear() {
|
||||
GL_restore_default_colors();
|
||||
printf("\033[2J"); // clear screen
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Moves current drawing position to top-left corner
|
||||
* \see GL_setpixelRGBhere() and GL_set2pixelsRGBhere()
|
||||
*/
|
||||
static inline void GL_home() {
|
||||
printf("\033[H");
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Call this function before starting drawing graphics
|
||||
* or each time graphics should be cleared
|
||||
*/
|
||||
static inline void GL_init() {
|
||||
printf("\033[?25l"); // hide cursor
|
||||
GL_home();
|
||||
GL_clear();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* \brief Call this function at the end of the program
|
||||
*/
|
||||
static inline void GL_terminate() {
|
||||
GL_restore_default_colors();
|
||||
GL_gotoxy(0,GL_height);
|
||||
printf("\033[?25h"); // show cursor
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Flushes pending graphic operations and waits a bit
|
||||
*/
|
||||
static inline void GL_swapbuffers() {
|
||||
// only flush if we are on a big machine, with true stdio support
|
||||
// otherwise does nothing (because our small MCU io lib is not buffered)
|
||||
#ifdef BIGCPU
|
||||
fflush(stdout);
|
||||
#endif
|
||||
#ifdef __linux__
|
||||
usleep(1000000/GL_FPS);
|
||||
#endif
|
||||
}
|
||||
|
||||
typedef void (*GL_pixelfunc_RGB)(int x, int y, uint8_t* r, uint8_t* g, uint8_t* b);
|
||||
typedef void (*GL_pixelfunc_RGBf)(int x, int y, float* r, float* g, float* b);
|
||||
|
||||
/**
|
||||
* \brief Draws an image by calling a user-specified function for each pixel.
|
||||
* \param[in] width , height dimension of the image in square pixels
|
||||
* \param[in] do_pixel the user function to be called for each pixel
|
||||
* (a "shader"), that determines the (integer) components r,g,b of
|
||||
* the pixel's color.
|
||||
* \details Uses half-charater pixels.
|
||||
*/
|
||||
static inline void GL_scan_RGB(
|
||||
int width, int height, GL_pixelfunc_RGB do_pixel
|
||||
) {
|
||||
uint8_t r1, g1, b1;
|
||||
uint8_t r2, g2, b2;
|
||||
GL_home();
|
||||
for (int j = 0; j<height; j+=2) {
|
||||
for (int i = 0; i<width; i++) {
|
||||
do_pixel(i,j , &r1, &g1, &b1);
|
||||
do_pixel(i,j+1, &r2, &g2, &b2);
|
||||
GL_set2pixelsRGBhere(r1,g1,b1,r2,g2,b2);
|
||||
if(i == width-1) {
|
||||
GL_newline();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* brief Converts a floating point value to a byte.
|
||||
* \param[in] the floating point value in [0,1]
|
||||
* \return the byte, in [0,255]
|
||||
* \details the input value is clamped to [0,1]
|
||||
*/
|
||||
static inline uint8_t GL_ftoi(float f) {
|
||||
f = (f < 0.0f) ? 0.0f : f;
|
||||
f = (f > 1.0f) ? 1.0f : f;
|
||||
return (uint8_t)(255.0f * f);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Draws an image by calling a user-specified function for each pixel.
|
||||
* \param[in] width , height dimension of the image in square pixels
|
||||
* \param[in] do_pixel the user function to be called for each pixel
|
||||
* (a "shader"), that determines the (floating-point) components
|
||||
* fr,fg,fb of the pixel's color.
|
||||
* \details Uses half-charater pixels.
|
||||
*/
|
||||
static inline void GL_scan_RGBf(
|
||||
int width, int height, GL_pixelfunc_RGBf do_pixel
|
||||
) {
|
||||
float fr1, fg1, fb1;
|
||||
float fr2, fg2, fb2;
|
||||
uint8_t r1, g1, b1;
|
||||
uint8_t r2, g2, b2;
|
||||
GL_home();
|
||||
for (int j = 0; j<height; j+=2) {
|
||||
for (int i = 0; i<width; i++) {
|
||||
do_pixel(i,j , &fr1, &fg1, &fb1);
|
||||
r1 = GL_ftoi(fr1);
|
||||
g1 = GL_ftoi(fg1);
|
||||
b1 = GL_ftoi(fb1);
|
||||
do_pixel(i,j+1, &fr2, &fg2, &fb2);
|
||||
r2 = GL_ftoi(fr2);
|
||||
g2 = GL_ftoi(fg2);
|
||||
b2 = GL_ftoi(fb2);
|
||||
GL_set2pixelsRGBhere(r1,g1,b1,r2,g2,b2);
|
||||
if(i == width-1) {
|
||||
GL_newline();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/***************************************************************/
|
||||
|
||||
#define INSIDE 0
|
||||
#define LEFT 1
|
||||
#define RIGHT 2
|
||||
#define BOTTOM 4
|
||||
#define TOP 8
|
||||
|
||||
#define XMIN 0
|
||||
#define XMAX (GL_width-1)
|
||||
#define YMIN 0
|
||||
#define YMAX (GL_height-1)
|
||||
|
||||
#define code(x,y) \
|
||||
((x) < XMIN) | (((x) > XMAX)<<1) | (((y) < YMIN)<<2) | (((y) > YMAX)<<3)
|
||||
|
||||
/***************************************************************/
|
||||
|
||||
static inline void GL_line(
|
||||
int x1, int y1, int x2, int y2, int R, int G, int B
|
||||
) {
|
||||
int x,y,dx,dy,sx,sy,tmp;
|
||||
|
||||
/* Cohen-Sutherland line clipping. */
|
||||
int code1 = code(x1,y1);
|
||||
int code2 = code(x2,y2);
|
||||
int codeout;
|
||||
|
||||
for(;;) {
|
||||
/* Both points inside. */
|
||||
if(code1 == 0 && code2 == 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
/* No point inside. */
|
||||
if(code1 & code2) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* One of the points is outside. */
|
||||
codeout = code1 ? code1 : code2;
|
||||
|
||||
/* Compute intersection. */
|
||||
if (codeout & TOP) {
|
||||
x = x1 + (x2 - x1) * (YMAX - y1) / (y2 - y1);
|
||||
y = YMAX;
|
||||
} else if (codeout & BOTTOM) {
|
||||
x = x1 + (x2 - x1) * (YMIN - y1) / (y2 - y1);
|
||||
y = YMIN;
|
||||
} else if (codeout & RIGHT) {
|
||||
y = y1 + (y2 - y1) * (XMAX - x1) / (x2 - x1);
|
||||
x = XMAX;
|
||||
} else if (codeout & LEFT) {
|
||||
y = y1 + (y2 - y1) * (XMIN - x1) / (x2 - x1);
|
||||
x = XMIN;
|
||||
}
|
||||
|
||||
/* Replace outside point with intersection. */
|
||||
if (codeout == code1) {
|
||||
x1 = x;
|
||||
y1 = y;
|
||||
code1 = code(x1,y1);
|
||||
} else {
|
||||
x2 = x;
|
||||
y2 = y;
|
||||
code2 = code(x2,y2);
|
||||
}
|
||||
}
|
||||
|
||||
// Swap both extremities to ensure x increases
|
||||
if(x2 < x1) {
|
||||
tmp = x2;
|
||||
x2 = x1;
|
||||
x1 = tmp;
|
||||
tmp = y2;
|
||||
y2 = y1;
|
||||
y1 = tmp;
|
||||
}
|
||||
|
||||
// Bresenham line drawing.
|
||||
dy = y2 - y1;
|
||||
sy = 1;
|
||||
if(dy < 0) {
|
||||
sy = -1;
|
||||
dy = -dy;
|
||||
}
|
||||
|
||||
dx = x2 - x1;
|
||||
|
||||
x = x1;
|
||||
y = y1;
|
||||
|
||||
if(dy > dx) {
|
||||
int ex = (dx << 1) - dy;
|
||||
for(int u=0; u<dy; u++) {
|
||||
GL_setpixelRGB(x,y,R,G,B);
|
||||
y += sy;
|
||||
if(ex >= 0) {
|
||||
x++;
|
||||
ex -= dy << 1;
|
||||
GL_setpixelRGB(x,y,R,G,B);
|
||||
}
|
||||
while(ex >= 0) {
|
||||
x++;
|
||||
ex -= dy << 1;
|
||||
putchar(' ');
|
||||
}
|
||||
ex += dx << 1;
|
||||
}
|
||||
} else {
|
||||
int ey = (dy << 1) - dx;
|
||||
for(int u=0; u<dx; u++) {
|
||||
GL_setpixelRGB(x,y,R,G,B);
|
||||
x++;
|
||||
while(ey >= 0) {
|
||||
y += sy;
|
||||
ey -= dx << 1;
|
||||
GL_setpixelRGB(x,y,R,G,B);
|
||||
}
|
||||
ey += dy << 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/***************************************************************/
|
||||
|
||||
#ifdef GL_USE_TURTLE
|
||||
|
||||
#include "sintab.h" // Ugly !!!
|
||||
|
||||
typedef struct {
|
||||
int x; // in [0..79]
|
||||
int y; // in [0..24]
|
||||
int angle; // in degrees
|
||||
int R,G,B; // pen color
|
||||
int pendown; // draw if non-zero
|
||||
} Turtle;
|
||||
|
||||
static inline void Turtle_init(Turtle* T) {
|
||||
T->x = GL_width/2;
|
||||
T->y = GL_height/2;
|
||||
T->angle = -90;
|
||||
T->pendown = 1;
|
||||
T->R = 255;
|
||||
T->G = 255;
|
||||
T->B = 255;
|
||||
}
|
||||
|
||||
static inline void Turtle_pen_up(Turtle* T) {
|
||||
T->pendown = 0;
|
||||
}
|
||||
|
||||
static inline void Turtle_pen_down(Turtle* T) {
|
||||
T->pendown = 1;
|
||||
}
|
||||
|
||||
static inline void Turtle_pen_color(Turtle* T, int R, int G, int B) {
|
||||
T->R = R;
|
||||
T->G = G;
|
||||
T->B = B;
|
||||
}
|
||||
|
||||
static inline void Turtle_forward(Turtle* T, int distance) {
|
||||
int last_x = T->x;
|
||||
int last_y = T->y;
|
||||
int a = T->angle;
|
||||
while(a < 0) {
|
||||
a += 360;
|
||||
}
|
||||
while(a > 360) {
|
||||
a -= 360;
|
||||
}
|
||||
T->x += (costab[a] * distance) / 256;
|
||||
T->y += (sintab[a] * distance) / 256;
|
||||
if(T->pendown) {
|
||||
GL_line(last_x, last_y, T->x, T->y, T->R, T->G, T->B);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void Turtle_backward(Turtle* T, int distance) {
|
||||
Turtle_forward(T,-distance);
|
||||
}
|
||||
|
||||
static inline void Turtle_turn_right(Turtle* T, int delta_angle) {
|
||||
T->angle += delta_angle;
|
||||
}
|
||||
|
||||
static inline void Turtle_turn_left(Turtle* T, int delta_angle) {
|
||||
Turtle_turn_right(T, -delta_angle);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,80 @@
|
||||
include ../../../FIRMWARE/makefile.inc
|
||||
RVASFLAGS=-march=$(ARCH) -mabi=$(ABI)
|
||||
RVCFLAGS=-I. -O2 -fno-pic -march=$(ARCH) -mabi=$(ABI) -fno-stack-protector -w -Wl,--no-relax
|
||||
|
||||
RAM_SIZE=6144
|
||||
|
||||
LIBOBJECTS=putchar.o wait.o print.o memcpy.o errno.o perf.o
|
||||
|
||||
%.bram.elf: %.o start.o $(LIBOBJECTS) $(RV_BINARIES)
|
||||
$(RVLD) -T bram.ld -m elf32lriscv -nostdlib -norelax $< $(LIBOBJECTS) $(RVTOOLCHAIN_GCC_LIB_DIR)/libgcc.a -o $@
|
||||
|
||||
|
||||
%.hex: %.elf $(FIRMWARE_DIR)/TOOLS/firmware_words
|
||||
$(FIRMWARE_DIR)/TOOLS/firmware_words $< -ram $(RAM_SIZE) -max_addr $(RAM_SIZE) -out $@
|
||||
cp $@ ../firmware.hex
|
||||
mkdir -p ../obj_dir
|
||||
cp $@ ../obj_dir/firmware.hex
|
||||
echo $@ > ../firmware.txt
|
||||
|
||||
|
||||
# SPI FLASH 0 (sends everything to SPI flash)
|
||||
|
||||
%.spiflash0.elf: %.o start.o $(LIBOBJECTS) $(RV_BINARIES)
|
||||
$(RVLD) -T spiflash0.ld -m elf32lriscv -nostdlib -norelax $< $(LIBOBJECTS) $(RVTOOLCHAIN_GCC_LIB_DIR)/libgcc.a -o $@
|
||||
|
||||
%.spiflash0.bin: %.spiflash0.elf
|
||||
$(RVOBJCOPY) $< $@ -O binary
|
||||
|
||||
%.spiflash0.prog: %.spiflash0.bin
|
||||
iceprog -o 128k $<
|
||||
|
||||
# SPI FLASH 1 (sends code and variables initialization to SPI flash, variables to RAM)
|
||||
|
||||
%.spiflash1.elf: %.o start_spiflash1.o $(LIBOBJECTS) $(RV_BINARIES)
|
||||
$(RVLD) -T spiflash1.ld -m elf32lriscv -nostdlib -norelax $< $(LIBOBJECTS) $(RVTOOLCHAIN_GCC_LIB_DIR)/libgcc.a -o $@
|
||||
|
||||
|
||||
%.spiflash1.bin: %.spiflash1.elf
|
||||
$(RVOBJCOPY) $< $@ -O binary
|
||||
|
||||
%.spiflash1.prog: %.spiflash1.bin
|
||||
iceprog -o 128k $<
|
||||
|
||||
|
||||
# SPI FLASH 2 (sends code and variables initialization to SPI flash, variables and fastcode to RAM)
|
||||
|
||||
%.spiflash2.elf: %.o start_spiflash1.o $(LIBOBJECTS) $(RV_BINARIES)
|
||||
$(RVLD) -T spiflash2.ld -m elf32lriscv -nostdlib -norelax $< $(LIBOBJECTS) -L$(RVTOOLCHAIN_LIB_DIR) -lm $(RVTOOLCHAIN_GCC_LIB_DIR)/libgcc.a -o $@
|
||||
|
||||
|
||||
%.spiflash2.bin: %.spiflash2.elf
|
||||
$(RVOBJCOPY) $< $@ -O binary
|
||||
|
||||
%.spiflash2.prog: %.spiflash2.bin
|
||||
iceprog -o 128k $<
|
||||
|
||||
%.spiflash2.list: %.spiflash2.elf
|
||||
$(RVOBJDUMP) -Mnumeric -D $< > $@
|
||||
|
||||
# DUAL MEMORY (64 kb program ROM, 64 kb data RAM)
|
||||
|
||||
%.pipeline.elf: %.o start_pipeline.o $(LIBOBJECTS) $(RV_BINARIES)
|
||||
$(RVLD) -T pipeline.ld -m elf32lriscv -nostdlib -norelax $< $(LIBOBJECTS) -L$(RVTOOLCHAIN_LIB_DIR) -lm $(RVTOOLCHAIN_GCC_LIB_DIR)/libgcc.a -o $@
|
||||
$(RVOBJDUMP) -Mnumeric -D $@ > $@.list
|
||||
|
||||
%.PROGROM.hex: %.pipeline.elf $(FIRMWARE_DIR)/TOOLS/firmware_words
|
||||
$(FIRMWARE_DIR)/TOOLS/firmware_words $< -ram 0x20000 -max_addr 0x20000 -out $@ -from_addr 0 -to_addr 0xFFFF
|
||||
cp $@ ../PROGROM.hex
|
||||
mkdir -p ../obj_dir
|
||||
cp $@ ../obj_dir/PROGROM.hex
|
||||
|
||||
%.DATARAM.hex: %.pipeline.elf $(FIRMWARE_DIR)/TOOLS/firmware_words
|
||||
$(FIRMWARE_DIR)/TOOLS/firmware_words $< -ram 0x20000 -max_addr 0x20000 -out $@ -from_addr 0x10000 -to_addr 0x1FFFF
|
||||
cp $@ ../DATARAM.hex
|
||||
mkdir -p ../obj_dir
|
||||
cp $@ ../obj_dir/DATARAM.hex
|
||||
|
||||
%.pipeline.hex: %.PROGROM.hex %.DATARAM.hex
|
||||
echo $@ > ../firmware.txt
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,480 @@
|
||||
/*
|
||||
* Reading the ST-NICCC megademo data stored in
|
||||
* the SPI flash and streaming it to polygons,
|
||||
* rendered as ANSI character sequences through
|
||||
* the UART.
|
||||
*
|
||||
* The polygon stream is a 640K file, that needs
|
||||
* to be stored in the SPI flash, using:
|
||||
* ICEStick: iceprog -o 1M EXAMPLES/DATA/scene1.dat
|
||||
* ULX3S: cp EXAMPLES/DATA/scene1.dat scene1.img
|
||||
* ujprog -j flash -f 1048576 scene1.img
|
||||
* (using latest version of ujprog compiled from https://github.com/kost/fujprog)
|
||||
*
|
||||
* More details and links in EXAMPLES/DATA/notes.txt
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef __linux__
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#else
|
||||
#include "io.h"
|
||||
#endif
|
||||
|
||||
// when compiling for SPI flash, uncomment to fit some routines in fast BRAM
|
||||
// (but it does not change much, the bottleneck is ANSI RGB encoding and uart.
|
||||
//#define RV32_FASTCODE __attribute((section(".fastcode")))
|
||||
#define RV32_FASTCODE
|
||||
|
||||
// when compiling for SPI flash, uncomment to enable wireframe mode (but it is ugly
|
||||
// and it will not fit in BRAM !)
|
||||
// #define WITH_WIREFRAME
|
||||
|
||||
#ifdef WITH_WIREFRAME
|
||||
int wireframe = 0;
|
||||
#endif
|
||||
|
||||
#define MIN(x,y) ((x) < (y) ? (x) : (y))
|
||||
#define MAX(x,y) ((x) > (y) ? (x) : (y))
|
||||
|
||||
|
||||
/**********************************************************************************/
|
||||
/* Graphics routines */
|
||||
/**********************************************************************************/
|
||||
|
||||
|
||||
// Map coordinates from file to screen
|
||||
|
||||
static inline uint8_t map_x(uint8_t x) {
|
||||
return x >> 1;
|
||||
}
|
||||
|
||||
static inline uint8_t map_y(uint8_t y) {
|
||||
return y >> 2;
|
||||
}
|
||||
|
||||
void GL_clear() {
|
||||
printf("\033[48;5;16m" // set background color black
|
||||
"\033[2J"); // clear screen
|
||||
}
|
||||
|
||||
/*
|
||||
* Set background color using 6x6x6 colorcube codes
|
||||
* see https://stackoverflow.com/questions/4842424/list-of-ansi-color-escape-sequences
|
||||
*/
|
||||
static inline void GL_setcolor(int color) {
|
||||
static int last_color = -1;
|
||||
if(color != last_color) {
|
||||
printf("\033[48;5;%dm",color);
|
||||
}
|
||||
last_color = color;
|
||||
}
|
||||
|
||||
static inline void GL_setpixel(int x, int y) {
|
||||
printf("\033[%d;%dH ",y,x); // Goto_XY(x1,y) and print space
|
||||
}
|
||||
|
||||
#ifdef WITH_WIREFRAME
|
||||
void GL_line(int x1, int y1, int x2, int y2) RV32_FASTCODE;
|
||||
void GL_line(int x1, int y1, int x2, int y2) {
|
||||
int x,y,dx,dy,sy,tmp;
|
||||
|
||||
// Swap both extremities to ensure x increases
|
||||
if(x2 < x1) {
|
||||
tmp = x2;
|
||||
x2 = x1;
|
||||
x1 = tmp;
|
||||
tmp = y2;
|
||||
y2 = y1;
|
||||
y1 = tmp;
|
||||
}
|
||||
|
||||
// Bresenham line drawing.
|
||||
dy = y2 - y1;
|
||||
sy = 1;
|
||||
if(dy < 0) {
|
||||
sy = -1;
|
||||
dy = -dy;
|
||||
}
|
||||
|
||||
dx = x2 - x1;
|
||||
|
||||
x = x1;
|
||||
y = y1;
|
||||
|
||||
if(dy > dx) {
|
||||
int ex = (dx << 1) - dy;
|
||||
for(int u=0; u<dy; u++) {
|
||||
GL_setpixel(x,y);
|
||||
y += sy;
|
||||
if(ex >= 0) {
|
||||
x++;
|
||||
ex -= dy << 1;
|
||||
GL_setpixel(x,y);
|
||||
}
|
||||
while(ex >= 0) {
|
||||
x++;
|
||||
ex -= dy << 1;
|
||||
putchar(' ');
|
||||
}
|
||||
ex += dx << 1;
|
||||
}
|
||||
} else {
|
||||
int ey = (dy << 1) - dx;
|
||||
for(int u=0; u<dx; u++) {
|
||||
GL_setpixel(x,y);
|
||||
x++;
|
||||
while(ey >= 0) {
|
||||
y += sy;
|
||||
ey -= dx << 1;
|
||||
GL_setpixel(x,y);
|
||||
}
|
||||
ey += dy << 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void GL_fillpoly(int nb_pts, int* points) RV32_FASTCODE;
|
||||
void GL_fillpoly(int nb_pts, int* points) {
|
||||
static int last_color = -1;
|
||||
|
||||
char x_left[128];
|
||||
char x_right[128];
|
||||
|
||||
/* Determine clockwise, miny, maxy */
|
||||
int clockwise = 0;
|
||||
int miny = 256;
|
||||
int maxy = -256;
|
||||
|
||||
for(int i1=0; i1<nb_pts; ++i1) {
|
||||
int i2=(i1==nb_pts-1) ? 0 : i1+1;
|
||||
int i3=(i2==nb_pts-1) ? 0 : i2+1;
|
||||
int x1 = points[2*i1];
|
||||
int y1 = points[2*i1+1];
|
||||
int dx1 = points[2*i2] - x1;
|
||||
int dy1 = points[2*i2+1] - y1;
|
||||
int dx2 = points[2*i3] - x1;
|
||||
int dy2 = points[2*i3+1] - y1;
|
||||
clockwise += dx1 * dy2 - dx2 * dy1;
|
||||
miny = MIN(miny,y1);
|
||||
maxy = MAX(maxy,y1);
|
||||
}
|
||||
|
||||
/* Determine x_left and x_right for each scaline */
|
||||
for(int i1=0; i1<nb_pts; ++i1) {
|
||||
int i2=(i1==nb_pts-1) ? 0 : i1+1;
|
||||
|
||||
int x1 = points[2*i1];
|
||||
int y1 = points[2*i1+1];
|
||||
int x2 = points[2*i2];
|
||||
int y2 = points[2*i2+1];
|
||||
|
||||
#ifdef WITH_WIREFRAME
|
||||
if(wireframe) {
|
||||
if((clockwise > 0) ^ (y2 > y1)) {
|
||||
GL_line(x1,y1,x2,y2);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
|
||||
char* x_buffer = ((clockwise > 0) ^ (y2 > y1)) ? x_left : x_right;
|
||||
int dx = x2 - x1;
|
||||
int sx = 1;
|
||||
int dy = y2 - y1;
|
||||
int sy = 1;
|
||||
int x = x1;
|
||||
int y = y1;
|
||||
int ex;
|
||||
|
||||
if(dx < 0) {
|
||||
sx = -1;
|
||||
dx = -dx;
|
||||
}
|
||||
|
||||
if(dy < 0) {
|
||||
sy = -1;
|
||||
dy = -dy;
|
||||
}
|
||||
|
||||
if(y1 == y2) {
|
||||
x_left[y1] = MIN(x1,x2);
|
||||
x_right[y1] = MAX(x1,x2);
|
||||
continue;
|
||||
}
|
||||
|
||||
ex = (dx << 1) - dy;
|
||||
|
||||
for(int u=0; u <= dy; ++u) {
|
||||
x_buffer[y] = x;
|
||||
y += sy;
|
||||
while(ex >= 0) {
|
||||
x += sx;
|
||||
ex -= dy << 1;
|
||||
}
|
||||
ex += dx << 1;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef WITH_WIREFRAME
|
||||
if(!wireframe)
|
||||
#endif
|
||||
{
|
||||
for(int y = miny; y <= maxy; ++y) {
|
||||
int x1 = x_left[y];
|
||||
int x2 = x_right[y];
|
||||
printf("\033[%d;%dH",y,x1); // Goto_XY(x1,y)
|
||||
for(int x=x1; x<x2; ++x) {
|
||||
putchar(' ');
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************************/
|
||||
|
||||
/*
|
||||
* Starting address of data stream stored in the
|
||||
* SPI.
|
||||
* I put the data stream starting from 1M offset,
|
||||
* just to make sure it does not collide with
|
||||
* FPGA wiring configuration ! (but FPGA configuration
|
||||
* only takes a few tenth of kilobytes I think).
|
||||
* Using the IO interface, it is using the physical address
|
||||
* (starting at 1M). Using the mapped memory interface,
|
||||
* SPI_FLASH_BASE is mapped to 1M.
|
||||
*/
|
||||
uint32_t spi_addr = 0;
|
||||
|
||||
/*
|
||||
* Word address and cached word used in mapped mode
|
||||
*/
|
||||
uint32_t spi_word_addr = 0;
|
||||
union {
|
||||
uint32_t spi_word;
|
||||
uint8_t spi_bytes[4];
|
||||
} spi_u;
|
||||
|
||||
#define ADDR_OFFSET 1024*1024
|
||||
|
||||
/*
|
||||
* Restarts reading from the beginning of the stream.
|
||||
*/
|
||||
void spi_reset() {
|
||||
spi_addr = ADDR_OFFSET;
|
||||
spi_word_addr = (uint32_t)(-1);
|
||||
}
|
||||
|
||||
|
||||
#ifdef __linux__
|
||||
|
||||
FILE* f = NULL;
|
||||
|
||||
/**
|
||||
* Reads one byte of data from the file (emulates read_spi_byte() when running on desktop)
|
||||
*/
|
||||
uint8_t next_spi_byte() {
|
||||
uint8_t result;
|
||||
if(f == NULL) {
|
||||
f = fopen("../../../FIRMWARE/EXAMPLES/DATA/scene1.dat","rb");
|
||||
if(f == NULL) {
|
||||
printf("Could not open data file\n");
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
if(spi_word_addr != spi_addr >> 2) {
|
||||
spi_word_addr = spi_addr >> 2;
|
||||
fseek(f, spi_word_addr*4-ADDR_OFFSET, SEEK_SET);
|
||||
fread(&(spi_u.spi_word), 4, 1, f);
|
||||
}
|
||||
result = spi_u.spi_bytes[spi_addr&3];
|
||||
++spi_addr;
|
||||
return (uint8_t)(result);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
|
||||
# define SPI_FLASH_BASE ((uint32_t*)(1 << 23))
|
||||
|
||||
/**
|
||||
* Reads one byte from the SPI flash, using the mapped SPI flash interface.
|
||||
*/
|
||||
static inline uint8_t next_spi_byte() {
|
||||
uint8_t result;
|
||||
if(spi_word_addr != spi_addr >> 2) {
|
||||
spi_word_addr = spi_addr >> 2;
|
||||
spi_u.spi_word = SPI_FLASH_BASE[spi_word_addr];
|
||||
}
|
||||
result = spi_u.spi_bytes[spi_addr&3];
|
||||
++spi_addr;
|
||||
return (uint8_t)(result);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static inline uint16_t next_spi_word() {
|
||||
/* In the ST-NICCC file,
|
||||
* words are stored in big endian format.
|
||||
* (see DATA/scene_description.txt).
|
||||
*/
|
||||
uint16_t hi = (uint16_t)next_spi_byte();
|
||||
uint16_t lo = (uint16_t)next_spi_byte();
|
||||
return (hi << 8) | lo;
|
||||
}
|
||||
|
||||
/*
|
||||
* The colormap, encoded in such a way that it
|
||||
* can be directly sent as ANSI color codes.
|
||||
*/
|
||||
int cmap[16];
|
||||
|
||||
/*
|
||||
* Current frame's vertices coordinates (if frame is indexed),
|
||||
* mapped to OLED display dimensions (divide by 2 from file).
|
||||
*/
|
||||
uint8_t X[255];
|
||||
uint8_t Y[255];
|
||||
|
||||
/*
|
||||
* Current polygon vertices, as expected
|
||||
* by GL_fillpoly():
|
||||
* xi = poly[2*i], yi = poly[2*i+1]
|
||||
*/
|
||||
int poly[30];
|
||||
|
||||
/*
|
||||
* Masks for frame flags.
|
||||
*/
|
||||
#define CLEAR_BIT 1
|
||||
#define PALETTE_BIT 2
|
||||
#define INDEXED_BIT 4
|
||||
|
||||
/*
|
||||
* Reads a frame's polygonal description from
|
||||
* SPI flash and rasterizes the polygons using
|
||||
* FemtoGL.
|
||||
* returns 0 if last frame.
|
||||
* See DATA/scene_description.txt for the
|
||||
* ST-NICCC file format.
|
||||
* See DATA/test_ST_NICCC.c for an example
|
||||
* program.
|
||||
*/
|
||||
int read_frame() RV32_FASTCODE;
|
||||
int read_frame() {
|
||||
uint8_t frame_flags = next_spi_byte();
|
||||
|
||||
// Update palette data.
|
||||
if(frame_flags & PALETTE_BIT) {
|
||||
uint16_t colors = next_spi_word();
|
||||
for(int b=15; b>=0; --b) {
|
||||
if(colors & (1 << b)) {
|
||||
int rgb = next_spi_word();
|
||||
|
||||
// Get the three 3-bits per component R,G,B
|
||||
int b3 = (rgb & 0x007);
|
||||
int g3 = (rgb & 0x070) >> 4;
|
||||
int r3 = (rgb & 0x700) >> 8;
|
||||
|
||||
// Re-encode them as ANSI 8-bits color
|
||||
b3 = b3 * 6 / 8;
|
||||
g3 = g3 * 6 / 8;
|
||||
r3 = r3 * 6 / 8;
|
||||
cmap[15-b] = 16 + b3 + 6*(g3 + 6*r3);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(frame_flags & CLEAR_BIT) {
|
||||
// GL_clear();
|
||||
}
|
||||
|
||||
// Update vertices
|
||||
if(frame_flags & INDEXED_BIT) {
|
||||
uint8_t nb_vertices = next_spi_byte();
|
||||
for(int v=0; v<nb_vertices; ++v) {
|
||||
X[v] = map_x(next_spi_byte());
|
||||
Y[v] = map_y(next_spi_byte());
|
||||
}
|
||||
}
|
||||
|
||||
// Draw frame's polygons
|
||||
for(;;) {
|
||||
uint8_t poly_desc = next_spi_byte();
|
||||
|
||||
// Special polygon codes (end of frame,
|
||||
// seek next block, end of stream)
|
||||
|
||||
if(poly_desc == 0xff) {
|
||||
break; // end of frame
|
||||
}
|
||||
if(poly_desc == 0xfe) {
|
||||
// Go to next 64kb block
|
||||
spi_addr -= ADDR_OFFSET;
|
||||
spi_addr &= ~65535;
|
||||
spi_addr += 65536;
|
||||
spi_addr += ADDR_OFFSET;
|
||||
return 1;
|
||||
}
|
||||
if(poly_desc == 0xfd) {
|
||||
return 0; // end of stream
|
||||
}
|
||||
|
||||
uint8_t nvrtx = poly_desc & 15;
|
||||
uint8_t poly_col = poly_desc >> 4;
|
||||
for(int i=0; i<nvrtx; ++i) {
|
||||
if(frame_flags & INDEXED_BIT) {
|
||||
uint8_t index = next_spi_byte();
|
||||
poly[2*i] = X[index];
|
||||
poly[2*i+1] = Y[index];
|
||||
} else {
|
||||
poly[2*i] = map_x(next_spi_byte());
|
||||
poly[2*i+1] = map_y(next_spi_byte());
|
||||
}
|
||||
}
|
||||
GL_setcolor(cmap[poly_col]);
|
||||
GL_fillpoly(nvrtx,poly);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
int main() {
|
||||
// printf("\x1B[?25l"); // hide cursor
|
||||
|
||||
#ifndef __linux__
|
||||
IO_OUT(IO_LEDS,15);
|
||||
#endif
|
||||
printf("starting\n");
|
||||
|
||||
#ifdef WITH_WIREFRAME
|
||||
wireframe = 0;
|
||||
#endif
|
||||
int frame = 0;
|
||||
GL_clear();
|
||||
for(;;) {
|
||||
spi_reset();
|
||||
frame = 0;
|
||||
while(read_frame()) {
|
||||
#ifdef WITH_WIREFRAME
|
||||
if(wireframe) {
|
||||
GL_clear();
|
||||
}
|
||||
#endif
|
||||
#ifdef __linux__
|
||||
usleep(20000);
|
||||
#else
|
||||
IO_OUT(IO_LEDS,frame);
|
||||
#endif
|
||||
++frame;
|
||||
}
|
||||
#ifdef WITH_WIREFRAME
|
||||
wireframe = !wireframe;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
# Simple blinker
|
||||
|
||||
.equ IO_BASE, 0x400000
|
||||
.equ IO_LEDS, 4
|
||||
|
||||
.section .text
|
||||
|
||||
.globl main
|
||||
|
||||
main:
|
||||
.L0:
|
||||
|
||||
li t0, 5
|
||||
sw t0, IO_LEDS(gp)
|
||||
call wait
|
||||
li t0, 10
|
||||
sw t0, IO_LEDS(gp)
|
||||
call wait
|
||||
j .L0
|
||||
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
MEMORY
|
||||
{
|
||||
BRAM (RWX) : ORIGIN = 0x0000, LENGTH = 0x1800 /* 6kB RAM */
|
||||
}
|
||||
SECTIONS
|
||||
{
|
||||
everything :
|
||||
{
|
||||
. = ALIGN(4);
|
||||
start.o (.text)
|
||||
*(.*)
|
||||
} >BRAM
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
#define RISCV
|
||||
#define TIME
|
||||
#define USE_MYSTDLIB
|
||||
|
||||
#include "DHRYSTONE/dhry_1.c"
|
||||
#include "DHRYSTONE/dhry_2.c"
|
||||
#include "DHRYSTONE/stubs.c"
|
||||
@@ -0,0 +1,182 @@
|
||||
// donut.c by Andy Sloane (@a1k0n)
|
||||
// https://gist.github.com/a1k0n/8ea6516b4946ab36348fb61703dc3194
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <math.h>
|
||||
|
||||
#define WITH_RV32M
|
||||
|
||||
#define debug(...)
|
||||
//#define debug printf
|
||||
|
||||
// torus radii and distance from camera
|
||||
// these are pretty baked-in to other constants now, so it probably won't work
|
||||
// if you change them too much.
|
||||
const int dz = 5, r1 = 1, r2 = 2;
|
||||
|
||||
// "Magic circle algorithm"? DDA? I've seen this formulation in a few places;
|
||||
// first in Hal Chamberlain's Musical Applications of Microprocessors, but not
|
||||
// sure what to call it, or how to justify it theoretically. It seems to
|
||||
// correctly rotate around a point "near" the origin, without losing magnitude
|
||||
// over long periods of time, as long as there are enough bits of precision in x
|
||||
// and y. I use 14 bits here.
|
||||
#define R(s,x,y) x-=(y>>s); y+=(x>>s)
|
||||
|
||||
// CORDIC algorithm to find magnitude of |x,y| by rotating the x,y vector onto
|
||||
// the x axis. This also brings vector (x2,y2) along for the ride, and writes
|
||||
// back to x2 -- this is used to rotate the lighting vector from the normal of
|
||||
// the torus surface towards the camera, and thus determine the lighting amount.
|
||||
// We only need to keep one of the two lighting normal coordinates.
|
||||
int length_cordic(int16_t x, int16_t y, int16_t *x2_, int16_t y2) {
|
||||
int x2 = *x2_;
|
||||
if (x < 0) { // start in right half-plane
|
||||
x = -x;
|
||||
x2 = -x2;
|
||||
}
|
||||
for (int i = 0; i < 8; i++) {
|
||||
int t = x;
|
||||
int t2 = x2;
|
||||
if (y < 0) {
|
||||
x -= y >> i;
|
||||
y += t >> i;
|
||||
x2 -= y2 >> i;
|
||||
y2 += t2 >> i;
|
||||
} else {
|
||||
x += y >> i;
|
||||
y -= t >> i;
|
||||
x2 += y2 >> i;
|
||||
y2 -= t2 >> i;
|
||||
}
|
||||
}
|
||||
// divide by 0.625 as a cheap approximation to the 0.607 scaling factor factor
|
||||
// introduced by this algorithm (see https://en.wikipedia.org/wiki/CORDIC)
|
||||
*x2_ = (x2 >> 1) + (x2 >> 3);
|
||||
return (x >> 1) + (x >> 3);
|
||||
}
|
||||
|
||||
void main() {
|
||||
// high-precision rotation directions, sines and cosines and their products
|
||||
int16_t sB = 0, cB = 16384;
|
||||
int16_t sA = 11583, cA = 11583;
|
||||
int16_t sAsB = 0, cAsB = 0;
|
||||
int16_t sAcB = 11583, cAcB = 11583;
|
||||
|
||||
for (;;) {
|
||||
int x1_16 = cAcB << 2;
|
||||
|
||||
// yes this is a multiply but dz is 5 so it's (sb + (sb<<2)) >> 6 effectively
|
||||
int p0x = dz * sB >> 6;
|
||||
int p0y = dz * sAcB >> 6;
|
||||
int p0z = -dz * cAcB >> 6;
|
||||
|
||||
const int r1i = r1*256;
|
||||
const int r2i = r2*256;
|
||||
|
||||
int niters = 0;
|
||||
int nnormals = 0;
|
||||
int16_t yincC = (cA >> 6) + (cA >> 5); // 12*cA >> 8;
|
||||
int16_t yincS = (sA >> 6) + (sA >> 5); // 12*sA >> 8;
|
||||
int16_t xincX = (cB >> 7) + (cB >> 6); // 6*cB >> 8;
|
||||
int16_t xincY = (sAsB >> 7) + (sAsB >> 6); // 6*sAsB >> 8;
|
||||
int16_t xincZ = (cAsB >> 7) + (cAsB >> 6); // 6*cAsB >> 8;
|
||||
int16_t ycA = -((cA >> 1) + (cA >> 4)); // -12 * yinc1 = -9*cA >> 4;
|
||||
int16_t ysA = -((sA >> 1) + (sA >> 4)); // -12 * yinc2 = -9*sA >> 4;
|
||||
//int dmin = INT_MAX, dmax = -INT_MAX;
|
||||
for (int j = 0; j < 23; j++, ycA += yincC, ysA += yincS) {
|
||||
int xsAsB = (sAsB >> 4) - sAsB; // -40*xincY
|
||||
int xcAsB = (cAsB >> 4) - cAsB; // -40*xincZ;
|
||||
|
||||
int16_t vxi14 = (cB >> 4) - cB - sB; // -40*xincX - sB;
|
||||
int16_t vyi14 = ycA - xsAsB - sAcB;
|
||||
int16_t vzi14 = ysA + xcAsB + cAcB;
|
||||
|
||||
for (int i = 0; i < 79; i++, vxi14 += xincX, vyi14 -= xincY, vzi14 += xincZ) {
|
||||
int t = 512; // (256 * dz) - r2i - r1i;
|
||||
|
||||
int16_t px = p0x + (vxi14 >> 5); // assuming t = 512, t*vxi>>8 == vxi<<1
|
||||
int16_t py = p0y + (vyi14 >> 5);
|
||||
int16_t pz = p0z + (vzi14 >> 5);
|
||||
debug("pxyz (%+4d,%+4d,%+4d)\n", px, py, pz);
|
||||
int16_t lx0 = sB >> 2;
|
||||
int16_t ly0 = sAcB - cA >> 2;
|
||||
int16_t lz0 = -cAcB - sA >> 2;
|
||||
for (;;) {
|
||||
int t0, t1, t2, d;
|
||||
int16_t lx = lx0, ly = ly0, lz = lz0;
|
||||
debug("[%2d,%2d] (px, py) = (%d, %d), (lx, ly) = (%d, %d) -> ", j, i, px, py, lx, ly);
|
||||
t0 = length_cordic(px, py, &lx, ly);
|
||||
debug("t0=%d (lx', ly') = (%d, %d)\n", t0, lx, ly);
|
||||
t1 = t0 - r2i;
|
||||
t2 = length_cordic(pz, t1, &lz, lx);
|
||||
d = t2 - r1i;
|
||||
t += d;
|
||||
|
||||
if (t > 8*256) {
|
||||
putchar(' ');
|
||||
break;
|
||||
} else if (d < 2) {
|
||||
int N = lz >> 9;
|
||||
putchar(".,-~:;!*=#$@"[N > 0 ? N < 12 ? N : 11 : 0]);
|
||||
nnormals++;
|
||||
break;
|
||||
}
|
||||
// todo: shift and add version of this
|
||||
|
||||
|
||||
/*
|
||||
if (d < dmin) dmin = d;
|
||||
if (d > dmax) dmax = d;
|
||||
*/
|
||||
|
||||
#ifdef WITH_RV32M
|
||||
px += d*vxi14 >> 14;
|
||||
py += d*vyi14 >> 14;
|
||||
pz += d*vzi14 >> 14;
|
||||
#else
|
||||
{
|
||||
// 11x1.14 fixed point 3x parallel multiply
|
||||
// only 16 bit registers needed; starts from highest bit to lowest
|
||||
// d is about 2..1100, so 11 bits are sufficient
|
||||
int16_t dx = 0, dy = 0, dz = 0;
|
||||
int16_t a = vxi14, b = vyi14, c = vzi14;
|
||||
while (d) {
|
||||
if (d&1024) {
|
||||
dx += a;
|
||||
dy += b;
|
||||
dz += c;
|
||||
}
|
||||
d = (d&1023) << 1;
|
||||
a >>= 1;
|
||||
b >>= 1;
|
||||
c >>= 1;
|
||||
}
|
||||
// we already shifted down 10 bits, so get the last four
|
||||
px += dx >> 4;
|
||||
py += dy >> 4;
|
||||
pz += dz >> 4;
|
||||
}
|
||||
#endif
|
||||
niters++;
|
||||
}
|
||||
}
|
||||
puts("");
|
||||
}
|
||||
printf("%d iterations %d lit pixels\x1b[K", niters, nnormals);
|
||||
// fflush(stdout);
|
||||
|
||||
// rotate sines, cosines, and products thereof
|
||||
// this animates the torus rotation about two axes
|
||||
R(5, cA, sA);
|
||||
R(5, cAsB, sAsB);
|
||||
R(5, cAcB, sAcB);
|
||||
R(6, cB, sB);
|
||||
R(6, cAcB, cAsB);
|
||||
R(6, sAcB, sAsB);
|
||||
|
||||
// usleep(15000);
|
||||
printf("\r\x1b[23A");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,427 @@
|
||||
// donut.c by Andy Sloane (@a1k0n)
|
||||
// https://gist.github.com/a1k0n/8ea6516b4946ab36348fb61703dc3194
|
||||
// Bruno Levy: added ANSI "pseudo-graphics", and RISC-V statistics
|
||||
|
||||
#define CPU_NAME "TordBoyau ULX3S" // Name of your CPU and FPGA board
|
||||
#define MHZ 95 // Frequency (without a timer we cannot guess)
|
||||
#define USE_MUL // Define if you support RV32M
|
||||
|
||||
// #define PRECISE // Define for a more accurate result (but it costs a bit)
|
||||
#define START_FRAMES 20 // Number of frames without display
|
||||
// (for accurate CPI/MIPS measurements)
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <math.h>
|
||||
|
||||
// 0 15 31 47 63 79 96 112 127 143 159 175 191 207 223 240 255
|
||||
|
||||
const char* colormap[34] = {
|
||||
"0",
|
||||
"8;5;232",
|
||||
"8;5;233",
|
||||
"8;5;234",
|
||||
"8;5;235",
|
||||
"8;5;236",
|
||||
"8;5;237",
|
||||
"8;5;238",
|
||||
"8;5;239",
|
||||
"8;5;240",
|
||||
"8;5;241",
|
||||
"8;5;242",
|
||||
"8;5;243",
|
||||
"8;5;244",
|
||||
"8;5;245",
|
||||
"8;5;246",
|
||||
"8;5;247",
|
||||
"8;5;248",
|
||||
"8;5;249",
|
||||
"8;5;250",
|
||||
"8;5;251",
|
||||
"8;5;252",
|
||||
"8;5;253",
|
||||
"8;5;254",
|
||||
"8;5;255",
|
||||
"7",
|
||||
"8;5;16",
|
||||
"8;5;17",
|
||||
"8;5;18",
|
||||
"8;5;19",
|
||||
"8;5;20",
|
||||
"8;5;21",
|
||||
"8;5;22",
|
||||
"8;5;23",
|
||||
};
|
||||
|
||||
int prev_color1=0;
|
||||
int prev_color2=0;
|
||||
|
||||
char scanline[80];
|
||||
|
||||
#ifdef __linux__
|
||||
|
||||
uint64_t my_rdcycle() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint64_t my_rdinstret() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
uint64_t my_rdcycle() {
|
||||
uint64_t result;
|
||||
uint32_t a0,a1,t0;
|
||||
{
|
||||
__asm__ __volatile__ ("rdcycleh %0" : "=r" (a1));
|
||||
__asm__ __volatile__ ("rdcycle %0" : "=r" (a0));
|
||||
__asm__ __volatile__ ("rdcycleh %0" : "=r" (t0));
|
||||
} while(t0 != a1);
|
||||
|
||||
return ((uint64_t)a1 << 32) | a0;
|
||||
}
|
||||
|
||||
uint64_t my_rdinstret() {
|
||||
uint64_t result;
|
||||
uint32_t a0,a1,t0;
|
||||
{
|
||||
__asm__ __volatile__ ("rdinstreth %0" : "=r" (a1));
|
||||
__asm__ __volatile__ ("rdinstret %0" : "=r" (a0));
|
||||
__asm__ __volatile__ ("rdinstreth %0" : "=r" (t0));
|
||||
} while(t0 != a1);
|
||||
|
||||
return ((uint64_t)a1 << 32) | a0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
uint64_t stats_cycles_init = 0;
|
||||
uint64_t stats_instructions_init = 0;
|
||||
uint64_t stats_cycles = 0;
|
||||
uint64_t stats_instructions = 0;
|
||||
int stats_CPI_times_1000 = 0;
|
||||
|
||||
void stats_start() {
|
||||
stats_cycles_init = my_rdcycle();
|
||||
stats_instructions_init = my_rdinstret();
|
||||
}
|
||||
|
||||
void stats_end() {
|
||||
stats_cycles = my_rdcycle() - stats_cycles_init;
|
||||
stats_instructions = my_rdinstret() - stats_instructions_init;
|
||||
if(stats_cycles==0) {
|
||||
stats_cycles++;
|
||||
}
|
||||
if(stats_instructions==0) {
|
||||
stats_instructions++;
|
||||
}
|
||||
stats_CPI_times_1000 = (int)((stats_cycles * 1000)/stats_instructions);
|
||||
}
|
||||
|
||||
// Print "fixed point" number (integer/1000)
|
||||
static void printk(uint64_t kx) {
|
||||
int intpart = (int)(kx / 1000);
|
||||
int fracpart = (int)(kx % 1000);
|
||||
printf("%d.",intpart);
|
||||
if(fracpart<100) {
|
||||
printf("0");
|
||||
}
|
||||
if(fracpart<10) {
|
||||
printf("0");
|
||||
}
|
||||
printf("%d",fracpart);
|
||||
}
|
||||
|
||||
static inline void setcolors(int fg, int bg) {
|
||||
printf("\033[4%s;3%sm",colormap[bg],colormap[fg]);
|
||||
}
|
||||
|
||||
static inline void setpixel(int x, int y, int color) {
|
||||
if(y&1){
|
||||
int color1 = scanline[x];
|
||||
int color2 = color;
|
||||
if(color1 == color2) {
|
||||
if(prev_color1 == color1) {
|
||||
putchar(' ');
|
||||
} else {
|
||||
printf("\033[4%sm ",colormap[color1]);
|
||||
prev_color1 = color1;
|
||||
}
|
||||
} else {
|
||||
if(prev_color1 != color1 && prev_color2 != color2) {
|
||||
printf("\033[4%s;3%sm",colormap[color1],colormap[color2]);
|
||||
prev_color1 = color1;
|
||||
prev_color2 = color2;
|
||||
} else if(prev_color1 != color1) {
|
||||
printf("\033[4%sm",colormap[color1]);
|
||||
prev_color1 = color1;
|
||||
} else if(prev_color2 != color2) {
|
||||
printf("\033[3%sm",colormap[color2]);
|
||||
prev_color2 = color2;
|
||||
}
|
||||
printf("\u2583");
|
||||
}
|
||||
} else {
|
||||
scanline[x] = color;
|
||||
}
|
||||
}
|
||||
|
||||
#define debug(...)
|
||||
//#define debug printf
|
||||
|
||||
// torus radii and distance from camera
|
||||
// these are pretty baked-in to other constants now, so it probably won't work
|
||||
// if you change them too much.
|
||||
const int dz = 5, r1 = 1, r2 = 2;
|
||||
|
||||
// "Magic circle algorithm"? DDA? I've seen this formulation in a few places;
|
||||
// first in Hal Chamberlain's Musical Applications of Microprocessors, but not
|
||||
// sure what to call it, or how to justify it theoretically. It seems to
|
||||
// correctly rotate around a point "near" the origin, without losing magnitude
|
||||
// over long periods of time, as long as there are enough bits of precision in x
|
||||
// and y. I use 14 bits here.
|
||||
#define R(s,x,y) x-=(y>>s); y+=(x>>s)
|
||||
|
||||
// CORDIC algorithm to find magnitude of |x,y| by rotating the x,y vector onto
|
||||
// the x axis. This also brings vector (x2,y2) along for the ride, and writes
|
||||
// back to x2 -- this is used to rotate the lighting vector from the normal of
|
||||
// the torus surface towards the camera, and thus determine the lighting amount.
|
||||
// We only need to keep one of the two lighting normal coordinates.
|
||||
int length_cordic(int16_t x, int16_t y, int16_t *x2_, int16_t y2) {
|
||||
|
||||
#ifdef PRECISE
|
||||
#define NIT 10
|
||||
#else
|
||||
#define NIT 5
|
||||
#endif
|
||||
|
||||
int x2 = *x2_;
|
||||
if (x < 0) { // start in right half-plane
|
||||
x = -x;
|
||||
x2 = -x2;
|
||||
}
|
||||
for (int i = 0; i<NIT; i++) {
|
||||
int t = x;
|
||||
int t2 = x2;
|
||||
if (y < 0) {
|
||||
x -= y >> i;
|
||||
y += t >> i;
|
||||
x2 -= y2 >> i;
|
||||
y2 += t2 >> i;
|
||||
} else {
|
||||
x += y >> i;
|
||||
y -= t >> i;
|
||||
x2 += y2 >> i;
|
||||
y2 -= t2 >> i;
|
||||
}
|
||||
}
|
||||
// divide by 0.625 as a cheap approximation to the 0.607 scaling factor factor
|
||||
// introduced by this algorithm (see https://en.wikipedia.org/wiki/CORDIC)
|
||||
*x2_ = (x2 >> 1) + (x2 >> 3);
|
||||
return (x >> 1) + (x >> 3)
|
||||
#ifdef PRECISE
|
||||
- (x >> 6) // get nrearer to 0.607 [Inigo Quilez]
|
||||
#endif
|
||||
;
|
||||
}
|
||||
|
||||
int main() {
|
||||
|
||||
printf( "\033[48;5;16m" // set background color black
|
||||
"\033[38;5;15m" // set foreground color white
|
||||
"\033[H" // home
|
||||
"\033[?25l" // hide cursor
|
||||
"\033[2J"); // clear screen
|
||||
|
||||
int frame = 0;
|
||||
|
||||
// high-precision rotation directions, sines and cosines and their products
|
||||
int16_t sB = 0, cB = 16384;
|
||||
int16_t sA = 11583, cA = 11583;
|
||||
int16_t sAsB = 0, cAsB = 0;
|
||||
int16_t sAcB = 11583, cAcB = 11583;
|
||||
|
||||
int accurate_CPI_x_1000;
|
||||
int accurate_MIPS_x_1000;
|
||||
int CPI_x_1000;
|
||||
|
||||
stats_start();
|
||||
|
||||
for (;;) {
|
||||
|
||||
int display_on = (frame > START_FRAMES);
|
||||
if(display_on) {
|
||||
stats_start();
|
||||
}
|
||||
|
||||
int x1_16 = cAcB << 2;
|
||||
|
||||
// yes this is a multiply but dz is 5 so it's (sb + (sb<<2)) >> 6 effectively
|
||||
int p0x = dz * sB >> 6;
|
||||
int p0y = dz * sAcB >> 6;
|
||||
int p0z = -dz * cAcB >> 6;
|
||||
|
||||
const int r1i = r1*256;
|
||||
const int r2i = r2*256;
|
||||
|
||||
int niters = 0;
|
||||
int nnormals = 0;
|
||||
int16_t yincC = (cA >> 6) + (cA >> 5); // 12*cA >> 8;
|
||||
int16_t yincS = (sA >> 6) + (sA >> 5); // 12*sA >> 8;
|
||||
int16_t xincX = (cB >> 7) + (cB >> 6); // 6*cB >> 8;
|
||||
int16_t xincY = (sAsB >> 7) + (sAsB >> 6); // 6*sAsB >> 8;
|
||||
int16_t xincZ = (cAsB >> 7) + (cAsB >> 6); // 6*cAsB >> 8;
|
||||
int16_t ycA = -((cA >> 1) + (cA >> 4)); // -12 * yinc1 = -9*cA >> 4;
|
||||
int16_t ysA = -((sA >> 1) + (sA >> 4)); // -12 * yinc2 = -9*sA >> 4;
|
||||
//int dmin = INT_MAX, dmax = -INT_MAX;
|
||||
|
||||
int xsAsB = (sAsB >> 4) - sAsB; // -40*xincY
|
||||
int xcAsB = (cAsB >> 4) - cAsB; // -40*xincZ;
|
||||
|
||||
|
||||
for (int j = 0; j < 46; j++, ycA += yincC>>1, ysA += yincS>>1) {
|
||||
|
||||
int16_t vxi14 = (cB >> 4) - cB - sB; // -40*xincX - sB;
|
||||
int16_t vyi14 = ycA - xsAsB - sAcB;
|
||||
int16_t vzi14 = ysA + xcAsB + cAcB;
|
||||
|
||||
for (int i = 0; i < 79; i++, vxi14 += xincX, vyi14 -= xincY, vzi14 += xincZ) {
|
||||
int t = 512; // (256 * dz) - r2i - r1i;
|
||||
|
||||
int16_t px = p0x + (vxi14 >> 5); // assuming t = 512, t*vxi>>8 == vxi<<1
|
||||
int16_t py = p0y + (vyi14 >> 5);
|
||||
int16_t pz = p0z + (vzi14 >> 5);
|
||||
debug("pxyz (%+4d,%+4d,%+4d)\n", px, py, pz);
|
||||
int16_t lx0 = sB >> 2;
|
||||
int16_t ly0 = sAcB - cA >> 2;
|
||||
int16_t lz0 = -cAcB - sA >> 2;
|
||||
for (;;) {
|
||||
int t0, t1, t2, d;
|
||||
int16_t lx = lx0, ly = ly0, lz = lz0;
|
||||
debug("[%2d,%2d] (px, py) = (%d, %d), (lx, ly) = (%d, %d) -> ", j, i, px, py, lx, ly);
|
||||
t0 = length_cordic(px, py, &lx, ly);
|
||||
debug("t0=%d (lx', ly') = (%d, %d)\n", t0, lx, ly);
|
||||
t1 = t0 - r2i;
|
||||
t2 = length_cordic(pz, t1, &lz, lx);
|
||||
d = t2 - r1i;
|
||||
t += d;
|
||||
|
||||
if (t > 8*256) {
|
||||
// putchar(' ');
|
||||
int N = (((j-frame)>>3)^(((i+frame)>>3)))&1;
|
||||
if(display_on) setpixel(i,j,(N<<2)+26);
|
||||
break;
|
||||
} else if (d < 2) {
|
||||
int N = lz >> 8;
|
||||
// putchar(".,-~:;!*=#$@"[N > 0 ? N < 12 ? N : 11 : 0]);
|
||||
N = N > 0 ? N < 26 ? N : 25 : 0;
|
||||
if(display_on) setpixel(i,j,N);
|
||||
nnormals++;
|
||||
break;
|
||||
}
|
||||
// todo: shift and add version of this
|
||||
|
||||
/*
|
||||
if (d < dmin) dmin = d;
|
||||
if (d > dmax) dmax = d;
|
||||
*/
|
||||
|
||||
#ifdef USE_MUL
|
||||
px += d*vxi14 >> 14;
|
||||
py += d*vyi14 >> 14;
|
||||
pz += d*vzi14 >> 14;
|
||||
#else
|
||||
{
|
||||
// 11x1.14 fixed point 3x parallel multiply
|
||||
// only 16 bit registers needed; starts from highest bit to lowest
|
||||
// d is about 2..1100, so 11 bits are sufficient
|
||||
int16_t dx = 0, dy = 0, dz = 0;
|
||||
int16_t a = vxi14, b = vyi14, c = vzi14;
|
||||
while (d) {
|
||||
if (d&1024) {
|
||||
dx += a;
|
||||
dy += b;
|
||||
dz += c;
|
||||
}
|
||||
d = (d&1023) << 1;
|
||||
a >>= 1;
|
||||
b >>= 1;
|
||||
c >>= 1;
|
||||
}
|
||||
// we already shifted down 10 bits, so get the last four
|
||||
px += dx >> 4;
|
||||
py += dy >> 4;
|
||||
pz += dz >> 4;
|
||||
}
|
||||
#endif
|
||||
niters++;
|
||||
}
|
||||
}
|
||||
if(display_on && (j&1)) puts("");
|
||||
}
|
||||
if(display_on) printf("\033[0m"); // reset colors
|
||||
|
||||
stats_end();
|
||||
|
||||
if(frame == START_FRAMES) {
|
||||
accurate_CPI_x_1000 = stats_CPI_times_1000;
|
||||
accurate_MIPS_x_1000 = (MHZ * 1000000) / accurate_CPI_x_1000;
|
||||
}
|
||||
|
||||
CPI_x_1000 = stats_CPI_times_1000;
|
||||
|
||||
uint64_t FPS_num = (uint64_t)(MHZ) * 1000000 * 1000;
|
||||
uint64_t FPS_denom = stats_cycles;
|
||||
int FPSx1000 = (int)(FPS_num / FPS_denom);
|
||||
|
||||
setcolors(25,33);
|
||||
#ifdef USE_MUL
|
||||
printf("%s RV32IM %dMHz ", CPU_NAME, MHZ);
|
||||
#else
|
||||
printf("%s RV32I %dMHz ", CPU_NAME, MHZ);
|
||||
#endif
|
||||
|
||||
setcolors(25,0);
|
||||
printf(" "); printk(FPSx1000); printf(" FPS ");
|
||||
setcolors(0,25);
|
||||
printf(" "); printk(CPI_x_1000);
|
||||
printf(" ("); printk(accurate_CPI_x_1000); printf(") CPI ");
|
||||
setcolors(25,0);
|
||||
printf(" "); printk(accurate_MIPS_x_1000); printf(" MIPS");
|
||||
/*
|
||||
setcolors(0,25);
|
||||
printf(" %d iterations ", niters);
|
||||
setcolors(0,25);
|
||||
printf(" %d lit pixels ", nnormals);
|
||||
*/
|
||||
setcolors(25,0);
|
||||
printf("\x1b[K");
|
||||
|
||||
#ifdef __linux__
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
// rotate sines, cosines, and products thereof
|
||||
// this animates the torus rotation about two axes
|
||||
R(5, cA, sA);
|
||||
R(5, cAsB, sAsB);
|
||||
R(5, cAcB, sAcB);
|
||||
R(6, cB, sB);
|
||||
R(6, cAcB, cAsB);
|
||||
R(6, sAcB, sAsB);
|
||||
|
||||
#ifdef __linux__
|
||||
usleep(15000);
|
||||
#endif
|
||||
printf("\r\x1b[23A");
|
||||
++frame;
|
||||
prev_color1=-1;
|
||||
prev_color2=-1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,11 @@
|
||||
|
||||
// Sometimes __errno is not linked, here is a dummy replacement.
|
||||
// Note that __errno is a function that returns a pointer to the
|
||||
// actual __errno (this is for multithreading). Made me bang my
|
||||
// head to the wall (and made tinyraytracer crash because powf()
|
||||
// was *calling* __errno).
|
||||
|
||||
int* __errno() {
|
||||
static int val = 0;
|
||||
return &val;
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
# Hello world !
|
||||
|
||||
.section .text
|
||||
.globl main
|
||||
|
||||
main:
|
||||
.L0:
|
||||
la a0, hello
|
||||
call putstring
|
||||
j .L0
|
||||
|
||||
putstring:
|
||||
addi sp,sp,-4 # save ra on the stack
|
||||
sw ra,0(sp) # (need to do that for functions that call functions)
|
||||
mv t2,a0
|
||||
.L1: lbu a0,0(t2)
|
||||
beqz a0,.L2
|
||||
call putchar
|
||||
addi t2,t2,1
|
||||
j .L1
|
||||
.L2: lw ra,0(sp) # restore ra
|
||||
addi sp,sp,4 # restore sp
|
||||
ret
|
||||
|
||||
.section .data
|
||||
hello:
|
||||
.asciz "Hello, world !\n"
|
||||
@@ -0,0 +1,113 @@
|
||||
// C version of humanshader
|
||||
// See https://humanshader.com/
|
||||
// (using a computer is clearly not as fun, but it is interesting to have
|
||||
// a small not too computationally expensive raytracing program that
|
||||
// can run on small softcores for PGAs).
|
||||
// Using the 16-bits version with no divide from here: https://www.shadertoy.com/view/XflXDs
|
||||
|
||||
#define GL_width 71
|
||||
#define GL_height 40
|
||||
#include "GL_tty.h"
|
||||
|
||||
void human_shader(
|
||||
int x, int y, uint8_t* r_out, uint8_t* g_out, uint8_t* b_out
|
||||
) {
|
||||
int R, B;
|
||||
|
||||
//-------------------------
|
||||
// Section A (2 MUL, 3 ADD)
|
||||
//-------------------------
|
||||
int u = x-36;
|
||||
int v = 18-y;
|
||||
int u2 = u*u;
|
||||
int v2 = v*v;
|
||||
int h = u2 + v2;
|
||||
//-------------------------
|
||||
|
||||
if( h < 200 )
|
||||
{
|
||||
//-------------------------------------
|
||||
// Section B, Sphere (4/7 MUL, 5/9 ADD)
|
||||
//-------------------------------------
|
||||
R = 420;
|
||||
B = 520;
|
||||
|
||||
int t = 5200 + (h<<3);
|
||||
int p = (t*u)>>7;
|
||||
int q = (t*v)>>7;
|
||||
|
||||
// bounce light
|
||||
int w = 18 + (((p*5-q*13))>>9);
|
||||
if( w>0 ) R += w*w;
|
||||
|
||||
// sky light / ambient occlusion
|
||||
int o = q + 900;
|
||||
R = (R*o)>>12;
|
||||
B = (B*o)>>12;
|
||||
|
||||
// sun/key light
|
||||
if( p > -q )
|
||||
{
|
||||
int w = (p+q)>>3;
|
||||
R += w;
|
||||
B += w;
|
||||
}
|
||||
//-------------------------
|
||||
}
|
||||
else if( v<0 )
|
||||
{
|
||||
//-------------------------------------
|
||||
// Section C, Ground (5/9 MUL, 6/9 ADD)
|
||||
//-------------------------------------
|
||||
R = 150 + (v<<1);
|
||||
B = 50;
|
||||
|
||||
int p = h + (v2<<3);
|
||||
int c = 240*(-v) - p;
|
||||
|
||||
// sky light / ambient occlusion
|
||||
if( c>1200 )
|
||||
{
|
||||
int o = (25*c)>>3;
|
||||
o = (c*(7840-o)>>9) - 8560;
|
||||
R = (R*o)>>10;
|
||||
B = (B*o)>>10;
|
||||
}
|
||||
|
||||
// sun/key light with soft shadow
|
||||
int r = c + u*v;
|
||||
int d = 3200 - h - (r<<1);
|
||||
if( d>0 ) R += d;
|
||||
//-------------------------
|
||||
}
|
||||
else
|
||||
{
|
||||
//------------------------------
|
||||
// Section D, Sky (1 MUL, 2 ADD)
|
||||
//------------------------------
|
||||
int c = x + (y<<2);
|
||||
R = 132 + c;
|
||||
B = 192 + c;
|
||||
//-------------------------
|
||||
}
|
||||
|
||||
//-------------------------
|
||||
// Section E (3 MUL, 1 ADD)
|
||||
//-------------------------
|
||||
if(R > 255) R = 255;
|
||||
if(B > 255) B = 255;
|
||||
|
||||
int G = (R*11 + 5*B)>>4;
|
||||
//-------------------------
|
||||
|
||||
*r_out = (uint8_t)R;
|
||||
*g_out = (uint8_t)G;
|
||||
*b_out = (uint8_t)B;
|
||||
}
|
||||
|
||||
int main() {
|
||||
GL_init();
|
||||
GL_scan_RGB(GL_width, GL_height, human_shader);
|
||||
GL_terminate();
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,10 @@
|
||||
#include <stdint.h>
|
||||
|
||||
#define IO_BASE 0x400000
|
||||
#define IO_LEDS 4
|
||||
#define IO_UART_DAT 8
|
||||
#define IO_UART_CNTL 16
|
||||
|
||||
#define IO_IN(port) *(volatile uint32_t*)(IO_BASE + port)
|
||||
#define IO_OUT(port,val) *(volatile uint32_t*)(IO_BASE + port)=(val)
|
||||
|
||||
@@ -0,0 +1,99 @@
|
||||
/*
|
||||
Computes and displays the Mandelbrot set on the OLED display.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef __linux__
|
||||
#include <unistd.h>
|
||||
#else
|
||||
#include "io.h"
|
||||
#endif
|
||||
|
||||
#define W 46
|
||||
#define H 46
|
||||
|
||||
#define mandel_shift 10
|
||||
#define mandel_mul (1 << mandel_shift)
|
||||
#define xmin -2*mandel_mul
|
||||
#define ymax 2*mandel_mul
|
||||
#define ymin -2*mandel_mul
|
||||
#define xmax 2*mandel_mul
|
||||
#define dx (xmax-xmin)/H
|
||||
#define dy (ymax-ymin)/H
|
||||
#define norm_max (4 << mandel_shift)
|
||||
|
||||
|
||||
#define ANSIRGB(R,G,B) "\033[48;2;" #R ";" #G ";" #B "m "
|
||||
|
||||
|
||||
const char* colormap[21] = {
|
||||
ANSIRGB( 0, 0, 0),
|
||||
ANSIRGB( 0, 0, 40),
|
||||
ANSIRGB( 0, 0, 80),
|
||||
ANSIRGB( 0, 0,120),
|
||||
ANSIRGB( 0, 0,160),
|
||||
ANSIRGB( 0, 0,200),
|
||||
ANSIRGB( 0, 0,240),
|
||||
|
||||
ANSIRGB( 0, 0, 0),
|
||||
ANSIRGB( 0, 40, 0),
|
||||
ANSIRGB( 0, 80, 0),
|
||||
ANSIRGB( 0,120, 0),
|
||||
ANSIRGB( 0,160, 0),
|
||||
ANSIRGB( 0,200, 0),
|
||||
ANSIRGB( 0,240, 0),
|
||||
|
||||
ANSIRGB( 0, 0, 0),
|
||||
ANSIRGB( 40, 0, 0),
|
||||
ANSIRGB( 80, 0, 0),
|
||||
ANSIRGB( 120, 0, 0),
|
||||
ANSIRGB( 160, 0, 0),
|
||||
ANSIRGB( 200, 0, 0),
|
||||
ANSIRGB( 240, 0, 0)
|
||||
};
|
||||
|
||||
int main() {
|
||||
int frame=0;
|
||||
for(;;) {
|
||||
IO_OUT(IO_LEDS,frame);
|
||||
int last_color = -1;
|
||||
printf("\033[H");
|
||||
int Ci = ymin;
|
||||
for(int Y=0; Y<H; ++Y) {
|
||||
int Cr = xmin;
|
||||
for(int X=0; X<W; ++X) {
|
||||
int Zr = Cr;
|
||||
int Zi = Ci;
|
||||
int iter = 20;
|
||||
while(iter > 0) {
|
||||
int Zrr = (Zr * Zr) >> mandel_shift;
|
||||
int Zii = (Zi * Zi) >> mandel_shift;
|
||||
int Zri = (Zr * Zi) >> (mandel_shift - 1);
|
||||
Zr = Zrr - Zii + Cr;
|
||||
Zi = Zri + Ci;
|
||||
if(Zrr + Zii > norm_max) {
|
||||
break;
|
||||
}
|
||||
--iter;
|
||||
}
|
||||
int color = (iter+frame)%21;
|
||||
printf(color == last_color ? " " : colormap[color]);
|
||||
last_color = color;
|
||||
Cr += dx;
|
||||
}
|
||||
Ci += dy;
|
||||
printf("\033[49m\n");
|
||||
last_color = -1;
|
||||
}
|
||||
++frame;
|
||||
#ifdef __linux__
|
||||
usleep(100000);
|
||||
#endif
|
||||
// if(frame>4) break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,125 @@
|
||||
# Computes and displays the Mandelbrot set on the terminal.
|
||||
# Needs NRV_IO_UART to be enabled.
|
||||
#
|
||||
# To access it, use:
|
||||
# miniterm.py --dtr=0 /dev/ttyUSB1 115200
|
||||
# or screen /dev/ttyUSB1 115200 (<ctrl> a \ to exit)
|
||||
|
||||
|
||||
# Base address of memory-mapped IO,
|
||||
# Loaded into gp at startup
|
||||
.equ IO_BASE, 0x400000
|
||||
|
||||
# IO-reg offsets. To read or write one of them,
|
||||
# use IO_XXX(gp)
|
||||
.equ IO_LEDS, 4
|
||||
.equ IO_UART_DAT, 8
|
||||
.equ IO_UART_CNTL, 16
|
||||
|
||||
.equ mandel_shift, 10
|
||||
.equ mandel_mul,(1 << mandel_shift)
|
||||
.equ xmin, -2*mandel_mul
|
||||
.equ xmax, 2*mandel_mul
|
||||
.equ ymin, -2*mandel_mul
|
||||
.equ ymax, 2*mandel_mul
|
||||
.equ dx, (xmax-xmin)/80
|
||||
.equ dy, (ymax-ymin)/80
|
||||
.equ norm_max,(4 << mandel_shift)
|
||||
|
||||
.section .text
|
||||
|
||||
# X,Y : s0,s1
|
||||
# Cr,Ci : s2,s3
|
||||
# Zr,Zi : s4,s5
|
||||
# Zrr,2Zri,Zii: s6,s7,s8
|
||||
# cnt: s10
|
||||
# 128: s11
|
||||
|
||||
.globl main
|
||||
|
||||
main:
|
||||
mandelstart:
|
||||
|
||||
li t0, 5
|
||||
sw t0, IO_LEDS(gp)
|
||||
call wait
|
||||
li t0, 10
|
||||
sw t0, IO_LEDS(gp)
|
||||
call wait
|
||||
li t0, 5
|
||||
sw t0, IO_LEDS(gp)
|
||||
li t0, 10
|
||||
sw t0, IO_LEDS(gp)
|
||||
call wait
|
||||
li t0, 0
|
||||
sw t0, IO_LEDS(gp)
|
||||
|
||||
|
||||
li s1,0
|
||||
li s3,xmin
|
||||
li s11,80
|
||||
|
||||
loop_y: li s0,0
|
||||
li s2,ymin
|
||||
|
||||
loop_x: mv s4,s2 # Z <- C
|
||||
mv s5,s3
|
||||
|
||||
li s10,9 # iter <- 9
|
||||
|
||||
loop_Z: mv a0,s4 # Zrr <- (Zr*Zr) >> mandel_shift
|
||||
mv a1,s4
|
||||
call __mulsi3
|
||||
srli s6,a0,mandel_shift
|
||||
mv a0,s4 # Zri <- (Zr*Zi) >> (mandel_shift-1)
|
||||
mv a1,s5
|
||||
call __mulsi3
|
||||
srai s7,a0,mandel_shift-1
|
||||
mv a0,s5 # Zii <- (Zi*Zi) >> (mandel_shift)
|
||||
mv a1,s5
|
||||
call __mulsi3
|
||||
srli s8,a0,mandel_shift
|
||||
sub s4,s6,s8 # Zr <- Zrr - Zii + Cr
|
||||
add s4,s4,s2
|
||||
add s5,s7,s3 # Zi <- 2Zri + Cr
|
||||
|
||||
add s6,s6,s8 # if norm > norm max, exit loop
|
||||
li s7,norm_max
|
||||
bgt s6,s7,exit_Z
|
||||
|
||||
add s10,s10,-1 # iter--, loop if non-zero
|
||||
bnez s10, loop_Z
|
||||
exit_Z:
|
||||
la a0,colormap
|
||||
add a0,a0,s10
|
||||
lbu a0,0(a0)
|
||||
call putchar
|
||||
|
||||
add s0,s0,1
|
||||
add s2,s2,dx
|
||||
bne s0,s11,loop_x
|
||||
|
||||
li a0,13
|
||||
call putchar
|
||||
li a0,10
|
||||
call putchar
|
||||
|
||||
add s1,s1,1
|
||||
add s3,s3,dy
|
||||
bne s1,s11,loop_y
|
||||
|
||||
li t0, 15
|
||||
sw t0, IO_LEDS(gp)
|
||||
|
||||
call putchar
|
||||
li a0,13
|
||||
call putchar
|
||||
li a0,10
|
||||
call putchar
|
||||
|
||||
j mandelstart
|
||||
|
||||
.section .data
|
||||
colormap:
|
||||
.ascii " .,:;ox%#@"
|
||||
|
||||
@@ -0,0 +1,27 @@
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#pragma GCC optimize ("no-tree-loop-distribute-patterns")
|
||||
|
||||
void* memcpy(void * dst, void const * src, size_t len) {
|
||||
uint32_t * plDst = (uint32_t *) dst;
|
||||
uint32_t const * plSrc = (uint32_t const *) src;
|
||||
|
||||
// If source and destination are aligned,
|
||||
// copy 32s bit by 32 bits.
|
||||
if (!((uint32_t)src & 3) && !((uint32_t)dst & 3)) {
|
||||
while (len >= 4) {
|
||||
*plDst++ = *plSrc++;
|
||||
len -= 4;
|
||||
}
|
||||
}
|
||||
|
||||
uint8_t* pcDst = (uint8_t *) plDst;
|
||||
uint8_t const* pcSrc = (uint8_t const *) plSrc;
|
||||
|
||||
while (len--) {
|
||||
*pcDst++ = *pcSrc++;
|
||||
}
|
||||
|
||||
return dst;
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
# https://blog.thea.codes/the-most-thoroughly-commented-linker-script/
|
||||
# https://interrupt.memfault.com/blog/how-to-write-linker-scripts-for-firmware
|
||||
|
||||
bin/riscv64-unknown-elf-as -march=rv32i -mabi=ilp32 -mno-relax mandelbrot_terminal.S -o mandelbrot_terminal.o
|
||||
riscv64-unknown-elf-ld mandelbrot_terminal.o -T baremetal.ld -m elf32lriscv -nostdlib -norelax
|
||||
/home/blevy/Programming/learn-fpga/FemtoRV/FIRMWARE/TOOLS/firmware_words a.elf -ram 6144 -hex a.hex
|
||||
|
||||
|
||||
FTDI 2232H
|
||||
|
||||
Rx >
|
||||
Tx <
|
||||
RTSn > Request to send
|
||||
CTSn < Clear to send
|
||||
DTRn > Data Terminal Ready
|
||||
DSRn < Data Set Ready
|
||||
DCDn > Data Carrier Detect
|
||||
|
||||
|
||||
#set_io DCDn 1
|
||||
#set_io DSRn 2
|
||||
#set_io DTRn 3
|
||||
#set_io CTSn 4
|
||||
#set_io RTSn 7
|
||||
set_io RS232_Tx_TTL 8
|
||||
set_io RS232_Rx_TTL 9
|
||||
@@ -0,0 +1,19 @@
|
||||
.section .text
|
||||
.globl rdcycle
|
||||
.globl rdinstret
|
||||
|
||||
rdcycle:
|
||||
.L0:
|
||||
rdcycleh a1
|
||||
rdcycle a0
|
||||
rdcycleh t0
|
||||
bne a1,t0,.L0
|
||||
ret
|
||||
|
||||
rdinstret:
|
||||
.L1:
|
||||
rdinstreth a1
|
||||
rdinstret a0
|
||||
rdinstreth t0
|
||||
bne a1,t0,.L1
|
||||
ret
|
||||
@@ -0,0 +1,4 @@
|
||||
#include <stdint.h>
|
||||
|
||||
extern uint64_t rdcycle();
|
||||
extern uint64_t rdinstret();
|
||||
+186
@@ -0,0 +1,186 @@
|
||||
/*
|
||||
* Computation of the n'th decimal digit of \pi with very little memory.
|
||||
* Written by Fabrice Bellard on January 8, 1997.
|
||||
*
|
||||
* We use a slightly modified version of the method described by Simon
|
||||
* Plouffe in "On the Computation of the n'th decimal digit of various
|
||||
* transcendental numbers" (November 1996). We have modified the algorithm
|
||||
* to get a running time of O(n^2) instead of O(n^3log(n)^3).
|
||||
*
|
||||
* This program uses mostly integer arithmetic. It may be slow on some
|
||||
* hardwares where integer multiplications and divisons must be done
|
||||
* by software. We have supposed that 'int' has a size of 32 bits. If
|
||||
* your compiler supports 'long long' integers of 64 bits, you may use
|
||||
* the integer version of 'mul_mod' (see HAS_LONG_LONG).
|
||||
*/
|
||||
|
||||
/* Adapted to FemtoRV32 (Bruno Levy Feb. 2021) */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
// #include "errno_fix.h"
|
||||
|
||||
|
||||
//#define RV32_FASTCODE __attribute((section(".fastcode")))
|
||||
#define RV32_FASTCODE
|
||||
|
||||
/* uncomment the following line to use 'long long' integers */
|
||||
#define HAS_LONG_LONG
|
||||
|
||||
#ifdef HAS_LONG_LONG
|
||||
#define mul_mod(a,b,m) (( (long long) (a) * (long long) (b) ) % (m))
|
||||
#else
|
||||
#define mul_mod(a,b,m) fmod( (double) a * (double) b, m)
|
||||
#endif
|
||||
|
||||
/* return the inverse of x mod y */
|
||||
int inv_mod(int x, int y) RV32_FASTCODE;
|
||||
int inv_mod(int x, int y)
|
||||
{
|
||||
int q, u, v, a, c, t;
|
||||
|
||||
u = x;
|
||||
v = y;
|
||||
c = 1;
|
||||
a = 0;
|
||||
do {
|
||||
q = v / u;
|
||||
|
||||
t = c;
|
||||
c = a - q * c;
|
||||
a = t;
|
||||
|
||||
t = u;
|
||||
u = v - q * u;
|
||||
v = t;
|
||||
} while (u != 0);
|
||||
a = a % y;
|
||||
if (a < 0)
|
||||
a = y + a;
|
||||
return a;
|
||||
}
|
||||
|
||||
/* return (a^b) mod m */
|
||||
int pow_mod(int a, int b, int m) RV32_FASTCODE;
|
||||
int pow_mod(int a, int b, int m)
|
||||
{
|
||||
int r, aa;
|
||||
|
||||
r = 1;
|
||||
aa = a;
|
||||
while (1) {
|
||||
if (b & 1)
|
||||
r = mul_mod(r, aa, m);
|
||||
b = b >> 1;
|
||||
if (b == 0)
|
||||
break;
|
||||
aa = mul_mod(aa, aa, m);
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
/* return true if n is prime */
|
||||
int is_prime(int n) RV32_FASTCODE;
|
||||
int is_prime(int n)
|
||||
{
|
||||
int r, i;
|
||||
if ((n % 2) == 0)
|
||||
return 0;
|
||||
|
||||
//r = (int) (sqrt(n));
|
||||
//for (i = 3; i <= r; i += 2)
|
||||
for (i = 3; i*i <= n; i += 2)
|
||||
if ((n % i) == 0)
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* return the prime number immediatly after n */
|
||||
int next_prime(int n) RV32_FASTCODE;
|
||||
int next_prime(int n)
|
||||
{
|
||||
do {
|
||||
n++;
|
||||
} while (!is_prime(n));
|
||||
return n;
|
||||
}
|
||||
|
||||
int digits(int n) RV32_FASTCODE;
|
||||
int digits(int n) {
|
||||
int av, a, vmax, N, num, den, k, kq, kq2, t, v, s, i;
|
||||
double sum;
|
||||
|
||||
N = (int) ((n + 20) * log(10) / log(2));
|
||||
|
||||
sum = 0;
|
||||
|
||||
for (a = 3; a <= (2 * N); a = next_prime(a)) {
|
||||
|
||||
vmax = (int) (log(2 * N) / log(a));
|
||||
av = 1;
|
||||
for (i = 0; i < vmax; i++)
|
||||
av = av * a;
|
||||
|
||||
s = 0;
|
||||
num = 1;
|
||||
den = 1;
|
||||
v = 0;
|
||||
kq = 1;
|
||||
kq2 = 1;
|
||||
|
||||
for (k = 1; k <= N; k++) {
|
||||
|
||||
t = k;
|
||||
if (kq >= a) {
|
||||
do {
|
||||
t = t / a;
|
||||
v--;
|
||||
} while ((t % a) == 0);
|
||||
kq = 0;
|
||||
}
|
||||
kq++;
|
||||
num = mul_mod(num, t, av);
|
||||
|
||||
t = (2 * k - 1);
|
||||
if (kq2 >= a) {
|
||||
if (kq2 == a) {
|
||||
do {
|
||||
t = t / a;
|
||||
v++;
|
||||
} while ((t % a) == 0);
|
||||
}
|
||||
kq2 -= a;
|
||||
}
|
||||
den = mul_mod(den, t, av);
|
||||
kq2 += 2;
|
||||
|
||||
if (v > 0) {
|
||||
t = inv_mod(den, av);
|
||||
t = mul_mod(t, num, av);
|
||||
t = mul_mod(t, k, av);
|
||||
for (i = v; i < vmax; i++)
|
||||
t = mul_mod(t, a, av);
|
||||
s += t;
|
||||
if (s >= av)
|
||||
s -= av;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
t = pow_mod(10, n - 1, av);
|
||||
s = mul_mod(s, t, av);
|
||||
|
||||
sum = fmod(sum + (double) s / (double) av, 1.0);
|
||||
}
|
||||
return (int) (sum * 1e9);
|
||||
}
|
||||
|
||||
|
||||
void main() {
|
||||
printf("\npi = 3.");
|
||||
for(int n=1; ;n+=9) {
|
||||
printf("%d",digits(n));
|
||||
if(n > 36) break;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,29 @@
|
||||
MEMORY {
|
||||
PROGROM (RX) : ORIGIN = 0x00000, LENGTH = 0x10000 /* 64kB ROM */
|
||||
DATARAM (RW) : ORIGIN = 0x10000, LENGTH = 0x10000 /* 64kB RAM */
|
||||
}
|
||||
|
||||
SECTIONS {
|
||||
|
||||
.text : {
|
||||
. = ALIGN(4);
|
||||
start_pipeline.o (.text)
|
||||
*(.text*)
|
||||
} > PROGROM
|
||||
|
||||
.data : {
|
||||
. = ALIGN(4);
|
||||
*(.data*)
|
||||
*(.sdata*)
|
||||
*(.rodata*)
|
||||
*(.srodata*)
|
||||
*(.bss*)
|
||||
*(.sbss*)
|
||||
|
||||
*(COMMON)
|
||||
*(.eh_frame)
|
||||
*(.eh_frame_hdr)
|
||||
*(.init_array*)
|
||||
*(.gcc_except_table*)
|
||||
} > DATARAM
|
||||
}
|
||||
@@ -0,0 +1,65 @@
|
||||
|
||||
#include <stdarg.h>
|
||||
|
||||
void print_string(const char* s) {
|
||||
for(const char* p = s; *p; ++p) {
|
||||
putchar(*p);
|
||||
}
|
||||
}
|
||||
|
||||
int puts(const char* s) {
|
||||
print_string(s);
|
||||
putchar('\n');
|
||||
return 1;
|
||||
}
|
||||
|
||||
void print_dec(int val) {
|
||||
char buffer[255];
|
||||
char *p = buffer;
|
||||
if(val < 0) {
|
||||
putchar('-');
|
||||
print_dec(-val);
|
||||
return;
|
||||
}
|
||||
while (val || p == buffer) {
|
||||
*(p++) = val % 10;
|
||||
val = val / 10;
|
||||
}
|
||||
while (p != buffer) {
|
||||
putchar('0' + *(--p));
|
||||
}
|
||||
}
|
||||
|
||||
void print_hex(unsigned int val) {
|
||||
print_hex_digits(val, 8);
|
||||
}
|
||||
|
||||
void print_hex_digits(unsigned int val, int nbdigits) {
|
||||
for (int i = (4*nbdigits)-4; i >= 0; i -= 4) {
|
||||
putchar("0123456789ABCDEF"[(val >> i) % 16]);
|
||||
}
|
||||
}
|
||||
|
||||
int printf(const char *fmt,...)
|
||||
{
|
||||
va_list ap;
|
||||
|
||||
for(va_start(ap, fmt);*fmt;fmt++)
|
||||
{
|
||||
if(*fmt=='%')
|
||||
{
|
||||
fmt++;
|
||||
if(*fmt=='s') print_string(va_arg(ap,char *));
|
||||
else if(*fmt=='x') print_hex(va_arg(ap,int));
|
||||
else if(*fmt=='d') print_dec(va_arg(ap,int));
|
||||
else if(*fmt=='c') putchar(va_arg(ap,int));
|
||||
else putchar(*fmt);
|
||||
}
|
||||
else putchar(*fmt);
|
||||
}
|
||||
|
||||
va_end(ap);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,22 @@
|
||||
# Base address of memory-mapped IO,
|
||||
# Loaded into gp at startup
|
||||
.equ IO_BASE, 0x400000
|
||||
|
||||
# IO-reg offsets. To read or write one of them,
|
||||
# use IO_XXX(gp)
|
||||
.equ IO_LEDS, 4
|
||||
.equ IO_UART_DAT, 8
|
||||
.equ IO_UART_CNTL, 16
|
||||
|
||||
.section .text
|
||||
.globl putchar
|
||||
|
||||
putchar:
|
||||
sw a0, IO_UART_DAT(gp)
|
||||
li t0, 1<<9
|
||||
.L0:
|
||||
lw t1, IO_UART_CNTL(gp)
|
||||
and t1, t1, t0
|
||||
bnez t1, .L0
|
||||
ret
|
||||
|
||||
@@ -0,0 +1,518 @@
|
||||
/* A port of Dmitry Sokolov's tiny raytracer to C and to FemtoRV32 */
|
||||
/* Displays on the small OLED display and/or HDMI */
|
||||
/* Bruno Levy, 2020 */
|
||||
/* Original tinyraytracer: https://github.com/ssloy/tinyraytracer */
|
||||
|
||||
#include <stdint.h>
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "perf.h"
|
||||
#include "io.h"
|
||||
|
||||
/*******************************************************************/
|
||||
|
||||
typedef int BOOL;
|
||||
|
||||
static inline float max(float x, float y) { return x>y?x:y; }
|
||||
static inline float min(float x, float y) { return x<y?x:y; }
|
||||
|
||||
/*******************************************************************/
|
||||
|
||||
// If you want to adapt tinyraytracer to your own platform, there are
|
||||
// mostly two macros and two functions to write:
|
||||
// graphics_width
|
||||
// graphics_height
|
||||
// graphics_init()
|
||||
// graphics_set_pixel()
|
||||
//
|
||||
// You can also write the following functions (or leave them empty if
|
||||
// you do not need them):
|
||||
// graphics_terminate()
|
||||
// stats_begin_frame()
|
||||
// stats_begin_pixel()
|
||||
// stats_end_pixel()
|
||||
// stats_end_frame()
|
||||
|
||||
|
||||
// Size of the screen
|
||||
// Replace with your own variables or values
|
||||
|
||||
// Benchmark
|
||||
// - graphics deactivated (else UART waiting loop gives
|
||||
// different results according to CPU freq / UART baud rate
|
||||
// ratio).
|
||||
// - smaller image size (for faster run in simulation)
|
||||
|
||||
static int graphics_width = 120;
|
||||
static int graphics_height = 60;
|
||||
|
||||
static int bench_run=0;
|
||||
|
||||
// Two pixels per character using UTF8 character set
|
||||
// (comment-out if terminal does not support it)
|
||||
#define graphics_double_lines
|
||||
|
||||
// Replace with your own stuff to initialize graphics
|
||||
static inline void graphics_init() {
|
||||
printf("\033[48;5;16m" // set background color black
|
||||
"\033[38;5;15m" // set foreground color white
|
||||
"\033[H" // home
|
||||
"\033[2J"); // clear screen
|
||||
}
|
||||
|
||||
// Replace with your own stuff to terminate graphics or leave empty
|
||||
// Here I send <ctrl><D> to the UART, to exit the simulation in Verilator,
|
||||
// it is captured by special code in RTL/DEVICES/uart.v
|
||||
static inline void graphics_terminate() {
|
||||
printf("\033[48;5;16m" // set background color black
|
||||
"\033[38;5;15m" // set foreground color white
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
// Replace with your own code.
|
||||
void graphics_set_pixel(int x, int y, float r, float g, float b) {
|
||||
r = max(0.0f, min(1.0f, r));
|
||||
g = max(0.0f, min(1.0f, g));
|
||||
b = max(0.0f, min(1.0f, b));
|
||||
uint8_t R = (uint8_t)(255.0f * r);
|
||||
uint8_t G = (uint8_t)(255.0f * g);
|
||||
uint8_t B = (uint8_t)(255.0f * b);
|
||||
// graphics output deactivated for bench run
|
||||
if(bench_run) {
|
||||
if(y & 1) {
|
||||
if(x == graphics_width-1) {
|
||||
printf("%d",y/2);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
#ifdef graphics_double_lines
|
||||
static uint8_t prev_R=0;
|
||||
static uint8_t prev_G=0;
|
||||
static uint8_t prev_B=0;
|
||||
if(y&1) {
|
||||
if((R == prev_R) && (G == prev_G) && (B == prev_B)) {
|
||||
printf("\033[48;2;%d;%d;%dm ",(int)R,(int)G,(int)B);
|
||||
} else {
|
||||
printf("\033[48;2;%d;%d;%dm",(int)prev_R,(int)prev_G,(int)prev_B);
|
||||
printf("\033[38;2;%d;%d;%dm",(int)R,(int)G,(int)B);
|
||||
// https://www.w3.org/TR/xml-entity-names/025.html
|
||||
// https://onlineunicodetools.com/convert-unicode-to-utf8
|
||||
printf("\xE2\x96\x83");
|
||||
}
|
||||
if(x == graphics_width-1) {
|
||||
printf("\033[38;2;0;0;0m");
|
||||
printf("\033[48;2;0;0;0m\n");
|
||||
}
|
||||
} else {
|
||||
prev_R = R;
|
||||
prev_G = G;
|
||||
prev_B = B;
|
||||
}
|
||||
#else
|
||||
printf("\033[48;2;%d;%d;%dm ",(int)R,(int)G,(int)B);
|
||||
if(x == graphics_width-1) {
|
||||
printf("\033[48;2;0;0;0m\n");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
// Begins statistics collection for current pixel
|
||||
// Leave emtpy if not needed.
|
||||
// There are these two levels because on some
|
||||
// femtorv32 cores (quark, tachyon), the clock tick counter does not
|
||||
// have sufficient bits and will wrap during the time taken by
|
||||
// rendering a frame (up to several minutes).
|
||||
static inline stats_begin_pixel() {
|
||||
}
|
||||
|
||||
// Ends statistics collection for current pixel
|
||||
// Leave emtpy if not needed.
|
||||
static inline stats_end_pixel() {
|
||||
}
|
||||
|
||||
// Print "fixed point" number (integer/1000)
|
||||
static void printk(uint64_t kx) {
|
||||
int intpart = (int)(kx / 1000);
|
||||
int fracpart = (int)(kx % 1000);
|
||||
printf("%d.",intpart);
|
||||
if(fracpart<100) {
|
||||
printf("0");
|
||||
}
|
||||
if(fracpart<10) {
|
||||
printf("0");
|
||||
}
|
||||
printf("%d",fracpart);
|
||||
}
|
||||
|
||||
static uint64_t instret_start;
|
||||
static uint64_t cycles_start;
|
||||
|
||||
// Begins statistics collection for current frame.
|
||||
// Leave emtpy if not needed.
|
||||
static inline stats_begin_frame() {
|
||||
instret_start = rdinstret();
|
||||
cycles_start = rdcycle();
|
||||
}
|
||||
|
||||
// Ends statistics collection for current frame
|
||||
// and displays result.
|
||||
// Leave emtpy if not needed.
|
||||
static inline stats_end_frame() {
|
||||
graphics_terminate();
|
||||
uint64_t instret = rdinstret() - instret_start;
|
||||
uint64_t cycles = rdcycle() - cycles_start ;
|
||||
uint64_t kCPI = cycles*1000/instret;
|
||||
uint64_t pixels = graphics_width * graphics_height;
|
||||
uint64_t kRAYSTONES = (pixels*1000000000)/cycles;
|
||||
printf(
|
||||
"\n%dx%d %s ",
|
||||
graphics_width,graphics_height,
|
||||
bench_run ?
|
||||
"no gfx output (measurement is accurate)" :
|
||||
"gfx output (measurement is NOT accurate)"
|
||||
);
|
||||
printf("CPI="); printk(kCPI); printf(" ");
|
||||
printf("RAYSTONES="); printk(kRAYSTONES);
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
// Normally you will not need to modify anything beyond that point.
|
||||
/*******************************************************************/
|
||||
|
||||
typedef struct { float x,y,z; } vec3;
|
||||
typedef struct { float x,y,z,w; } vec4;
|
||||
|
||||
static inline vec3 make_vec3(float x, float y, float z) {
|
||||
vec3 V;
|
||||
V.x = x; V.y = y; V.z = z;
|
||||
return V;
|
||||
}
|
||||
|
||||
static inline vec4 make_vec4(float x, float y, float z, float w) {
|
||||
vec4 V;
|
||||
V.x = x; V.y = y; V.z = z; V.w = w;
|
||||
return V;
|
||||
}
|
||||
|
||||
static inline vec3 vec3_neg(vec3 V) {
|
||||
return make_vec3(-V.x, -V.y, -V.z);
|
||||
}
|
||||
|
||||
static inline vec3 vec3_add(vec3 U, vec3 V) {
|
||||
return make_vec3(U.x+V.x, U.y+V.y, U.z+V.z);
|
||||
}
|
||||
|
||||
static inline vec3 vec3_sub(vec3 U, vec3 V) {
|
||||
return make_vec3(U.x-V.x, U.y-V.y, U.z-V.z);
|
||||
}
|
||||
|
||||
static inline float vec3_dot(vec3 U, vec3 V) {
|
||||
return U.x*V.x+U.y*V.y+U.z*V.z;
|
||||
}
|
||||
|
||||
static inline vec3 vec3_scale(float s, vec3 U) {
|
||||
return make_vec3(s*U.x, s*U.y, s*U.z);
|
||||
}
|
||||
|
||||
static inline float vec3_length(vec3 U) {
|
||||
return sqrtf(U.x*U.x+U.y*U.y+U.z*U.z);
|
||||
}
|
||||
|
||||
static inline vec3 vec3_normalize(vec3 U) {
|
||||
return vec3_scale(1.0f/vec3_length(U),U);
|
||||
}
|
||||
|
||||
/*************************************************************************/
|
||||
|
||||
typedef struct Light {
|
||||
vec3 position;
|
||||
float intensity;
|
||||
} Light;
|
||||
|
||||
Light make_Light(vec3 position, float intensity) {
|
||||
Light L;
|
||||
L.position = position;
|
||||
L.intensity = intensity;
|
||||
return L;
|
||||
}
|
||||
|
||||
/*************************************************************************/
|
||||
|
||||
typedef struct {
|
||||
float refractive_index;
|
||||
vec4 albedo;
|
||||
vec3 diffuse_color;
|
||||
float specular_exponent;
|
||||
} Material;
|
||||
|
||||
Material make_Material(float r, vec4 a, vec3 color, float spec) {
|
||||
Material M;
|
||||
M.refractive_index = r;
|
||||
M.albedo = a;
|
||||
M.diffuse_color = color;
|
||||
M.specular_exponent = spec;
|
||||
return M;
|
||||
}
|
||||
|
||||
Material make_Material_default() {
|
||||
Material M;
|
||||
M.refractive_index = 1;
|
||||
M.albedo = make_vec4(1,0,0,0);
|
||||
M.diffuse_color = make_vec3(0,0,0);
|
||||
M.specular_exponent = 0;
|
||||
return M;
|
||||
}
|
||||
|
||||
/*************************************************************************/
|
||||
|
||||
typedef struct {
|
||||
vec3 center;
|
||||
float radius;
|
||||
Material material;
|
||||
} Sphere;
|
||||
|
||||
Sphere make_Sphere(vec3 c, float r, Material M) {
|
||||
Sphere S;
|
||||
S.center = c;
|
||||
S.radius = r;
|
||||
S.material = M;
|
||||
return S;
|
||||
}
|
||||
|
||||
BOOL Sphere_ray_intersect(Sphere* S, vec3 orig, vec3 dir, float* t0) {
|
||||
vec3 L = vec3_sub(S->center, orig);
|
||||
float tca = vec3_dot(L,dir);
|
||||
float d2 = vec3_dot(L,L) - tca*tca;
|
||||
float r2 = S->radius*S->radius;
|
||||
if (d2 > r2) return 0;
|
||||
float thc = sqrtf(r2 - d2);
|
||||
*t0 = tca - thc;
|
||||
float t1 = tca + thc;
|
||||
if (*t0 < 0) *t0 = t1;
|
||||
if (*t0 < 0) return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
vec3 reflect(vec3 I, vec3 N) {
|
||||
return vec3_sub(I, vec3_scale(2.f*vec3_dot(I,N),N));
|
||||
}
|
||||
|
||||
vec3 refract(vec3 I, vec3 N, float eta_t, float eta_i /* =1.f */) {
|
||||
// Snell's law
|
||||
float cosi = -max(-1.f, min(1.f, vec3_dot(I,N)));
|
||||
// if the ray comes from the inside the object, swap the air and the media
|
||||
if (cosi<0) return refract(I, vec3_neg(N), eta_i, eta_t);
|
||||
float eta = eta_i / eta_t;
|
||||
float k = 1 - eta*eta*(1 - cosi*cosi);
|
||||
// k<0 = total reflection, no ray to refract.
|
||||
// I refract it anyways, this has no physical meaning
|
||||
return k<0 ? make_vec3(1,0,0)
|
||||
: vec3_add(vec3_scale(eta,I),vec3_scale((eta*cosi - sqrtf(k)),N));
|
||||
}
|
||||
|
||||
BOOL scene_intersect(
|
||||
vec3 orig, vec3 dir, Sphere* spheres, int nb_spheres,
|
||||
vec3* hit, vec3* N, Material* material
|
||||
) {
|
||||
float spheres_dist = 1e30;
|
||||
for(int i=0; i<nb_spheres; ++i) {
|
||||
float dist_i;
|
||||
if(
|
||||
Sphere_ray_intersect(&spheres[i], orig, dir, &dist_i) &&
|
||||
(dist_i < spheres_dist)
|
||||
) {
|
||||
spheres_dist = dist_i;
|
||||
*hit = vec3_add(orig,vec3_scale(dist_i,dir));
|
||||
*N = vec3_normalize(vec3_sub(*hit, spheres[i].center));
|
||||
*material = spheres[i].material;
|
||||
}
|
||||
}
|
||||
float checkerboard_dist = 1e30;
|
||||
if (fabs(dir.y)>1e-3) {
|
||||
float d = -(orig.y+4)/dir.y; // the checkerboard plane has equation y = -4
|
||||
vec3 pt = vec3_add(orig, vec3_scale(d,dir));
|
||||
if (d>0 && fabs(pt.x)<10 && pt.z<-10 && pt.z>-30 && d<spheres_dist) {
|
||||
checkerboard_dist = d;
|
||||
*hit = pt;
|
||||
*N = make_vec3(0,1,0);
|
||||
material->diffuse_color =
|
||||
(((int)(.5*hit->x+1000) + (int)(.5*hit->z)) & 1)
|
||||
? make_vec3(.3, .3, .3)
|
||||
: make_vec3(.3, .2, .1);
|
||||
}
|
||||
}
|
||||
return min(spheres_dist, checkerboard_dist)<1000;
|
||||
}
|
||||
|
||||
vec3 cast_ray(
|
||||
vec3 orig, vec3 dir, Sphere* spheres, int nb_spheres,
|
||||
Light* lights, int nb_lights, int depth /* =0 */
|
||||
) {
|
||||
vec3 point,N;
|
||||
Material material = make_Material_default();
|
||||
if (
|
||||
depth>2 ||
|
||||
!scene_intersect(orig, dir, spheres, nb_spheres, &point, &N, &material)
|
||||
) {
|
||||
float s = 0.5*(dir.y + 1.0);
|
||||
return vec3_add(
|
||||
vec3_scale(s,make_vec3(0.2, 0.7, 0.8)),
|
||||
vec3_scale(s,make_vec3(0.0, 0.0, 0.5))
|
||||
);
|
||||
}
|
||||
|
||||
vec3 reflect_dir=vec3_normalize(reflect(dir, N));
|
||||
vec3 refract_dir=vec3_normalize(refract(dir,N,material.refractive_index,1));
|
||||
|
||||
// offset the original point to avoid occlusion by the object itself
|
||||
vec3 reflect_orig =
|
||||
vec3_dot(reflect_dir,N) < 0
|
||||
? vec3_sub(point,vec3_scale(1e-3,N))
|
||||
: vec3_add(point,vec3_scale(1e-3,N));
|
||||
vec3 refract_orig =
|
||||
vec3_dot(refract_dir,N) < 0
|
||||
? vec3_sub(point,vec3_scale(1e-3,N))
|
||||
: vec3_add(point,vec3_scale(1e-3,N));
|
||||
vec3 reflect_color = cast_ray(
|
||||
reflect_orig, reflect_dir, spheres, nb_spheres,
|
||||
lights, nb_lights, depth + 1
|
||||
);
|
||||
vec3 refract_color = cast_ray(
|
||||
refract_orig, refract_dir, spheres, nb_spheres,
|
||||
lights, nb_lights, depth + 1
|
||||
);
|
||||
|
||||
float diffuse_light_intensity = 0, specular_light_intensity = 0;
|
||||
for (int i=0; i<nb_lights; i++) {
|
||||
vec3 light_dir = vec3_normalize(vec3_sub(lights[i].position,point));
|
||||
float light_distance = vec3_length(vec3_sub(lights[i].position,point));
|
||||
|
||||
vec3 shadow_orig =
|
||||
vec3_dot(light_dir,N) < 0
|
||||
? vec3_sub(point,vec3_scale(1e-3,N))
|
||||
: vec3_add(point,vec3_scale(1e-3,N)) ;
|
||||
// checking if the point lies in the shadow of the lights[i]
|
||||
vec3 shadow_pt, shadow_N;
|
||||
Material tmpmaterial;
|
||||
if (
|
||||
scene_intersect(
|
||||
shadow_orig, light_dir, spheres, nb_spheres,
|
||||
&shadow_pt, &shadow_N, &tmpmaterial
|
||||
) && (
|
||||
vec3_length(vec3_sub(shadow_pt,shadow_orig)) < light_distance
|
||||
)
|
||||
) continue ;
|
||||
|
||||
diffuse_light_intensity +=
|
||||
lights[i].intensity * max(0.f, vec3_dot(light_dir,N));
|
||||
|
||||
float abc = max(
|
||||
0.f, vec3_dot(vec3_neg(reflect(vec3_neg(light_dir), N)),dir)
|
||||
);
|
||||
float def = material.specular_exponent;
|
||||
if(abc > 0.0f && def > 0.0f) {
|
||||
specular_light_intensity += powf(abc,def)*lights[i].intensity;
|
||||
}
|
||||
}
|
||||
vec3 result = vec3_scale(
|
||||
diffuse_light_intensity * material.albedo.x, material.diffuse_color
|
||||
);
|
||||
result = vec3_add(
|
||||
result, vec3_scale(specular_light_intensity * material.albedo.y,
|
||||
make_vec3(1,1,1))
|
||||
);
|
||||
result = vec3_add(result, vec3_scale(material.albedo.z, reflect_color));
|
||||
result = vec3_add(result, vec3_scale(material.albedo.w, refract_color));
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline void render_pixel(
|
||||
int i, int j, Sphere* spheres, int nb_spheres, Light* lights, int nb_lights
|
||||
) {
|
||||
const float fov = M_PI/3.;
|
||||
stats_begin_pixel();
|
||||
float dir_x = (i + 0.5) - graphics_width/2.;
|
||||
float dir_y = -(j + 0.5) + graphics_height/2.; // this flips the image.
|
||||
float dir_z = -graphics_height/(2.*tan(fov/2.));
|
||||
vec3 C = cast_ray(
|
||||
make_vec3(0,0,0), vec3_normalize(make_vec3(dir_x, dir_y, dir_z)),
|
||||
spheres, nb_spheres, lights, nb_lights, 0
|
||||
);
|
||||
graphics_set_pixel(i,j,C.x,C.y,C.z);
|
||||
stats_end_pixel();
|
||||
}
|
||||
|
||||
void render(Sphere* spheres, int nb_spheres, Light* lights, int nb_lights) {
|
||||
stats_begin_frame();
|
||||
#ifdef graphics_double_lines
|
||||
for (int j = 0; j<graphics_height; j+=2) {
|
||||
for (int i = 0; i<graphics_width; i++) {
|
||||
render_pixel(i,j ,spheres,nb_spheres,lights,nb_lights);
|
||||
render_pixel(i,j+1,spheres,nb_spheres,lights,nb_lights);
|
||||
}
|
||||
}
|
||||
#else
|
||||
for (int j = 0; j<graphics_height; j++) {
|
||||
for (int i = 0; i<graphics_width; i++) {
|
||||
render_pixel(i,j ,spheres,nb_spheres,lights,nb_lights);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
stats_end_frame();
|
||||
}
|
||||
|
||||
int nb_spheres = 4;
|
||||
Sphere spheres[4];
|
||||
|
||||
int nb_lights = 3;
|
||||
Light lights[3];
|
||||
|
||||
void init_scene() {
|
||||
Material ivory = make_Material(
|
||||
1.0, make_vec4(0.6, 0.3, 0.1, 0.0), make_vec3(0.4, 0.4, 0.3), 50.
|
||||
);
|
||||
Material glass = make_Material(
|
||||
1.5, make_vec4(0.0, 0.5, 0.1, 0.8), make_vec3(0.6, 0.7, 0.8), 125.
|
||||
);
|
||||
Material red_rubber = make_Material(
|
||||
1.0, make_vec4(0.9, 0.1, 0.0, 0.0), make_vec3(0.3, 0.1, 0.1), 10.
|
||||
);
|
||||
Material mirror = make_Material(
|
||||
1.0, make_vec4(0.0, 10.0, 0.8, 0.0), make_vec3(1.0, 1.0, 1.0), 142.
|
||||
);
|
||||
|
||||
spheres[0] = make_Sphere(make_vec3(-3, 0, -16), 2, ivory);
|
||||
spheres[1] = make_Sphere(make_vec3(-1.0, -1.5, -12), 2, glass);
|
||||
spheres[2] = make_Sphere(make_vec3( 1.5, -0.5, -18), 3, red_rubber);
|
||||
spheres[3] = make_Sphere(make_vec3( 7, 5, -18), 4, mirror);
|
||||
|
||||
lights[0] = make_Light(make_vec3(-20, 20, 20), 1.5);
|
||||
lights[1] = make_Light(make_vec3( 30, 50, -25), 1.8);
|
||||
lights[2] = make_Light(make_vec3( 30, 20, 30), 1.7);
|
||||
}
|
||||
|
||||
int main() {
|
||||
init_scene();
|
||||
|
||||
graphics_init();
|
||||
IO_OUT(IO_LEDS,5);
|
||||
bench_run = 1;
|
||||
graphics_width = 40;
|
||||
graphics_height = 20;
|
||||
printf("Running without graphic output (for accurate measurement)...\n");
|
||||
render(spheres, nb_spheres, lights, nb_lights);
|
||||
IO_OUT(IO_LEDS,10);
|
||||
|
||||
bench_run = 0;
|
||||
graphics_width = 120;
|
||||
graphics_height = 60;
|
||||
render(spheres, nb_spheres, lights, nb_lights);
|
||||
IO_OUT(IO_LEDS,15);
|
||||
graphics_terminate();
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,14 @@
|
||||
#include "io.h"
|
||||
|
||||
#define SPI_FLASH_BASE ((char*)(1 << 23))
|
||||
|
||||
int main() {
|
||||
for(int i=0; i<16; ++i) {
|
||||
IO_OUT(IO_LEDS,i);
|
||||
int lo = (int)SPI_FLASH_BASE[2*i ];
|
||||
int hi = (int)SPI_FLASH_BASE[2*i+1];
|
||||
print_hex_digits((hi << 8) | lo,4); // print four hexadecimal digits
|
||||
printf(" ");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
@@ -0,0 +1,135 @@
|
||||
/*
|
||||
* FEMTORV32 - FEMTOSOC demo program:
|
||||
* Displaying a rotating RISCV logo
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef __linux__
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
|
||||
/* The RISCV logo, with a tiny resolution
|
||||
* (remember, I only got 4Kb of RAM
|
||||
* on the IceStick !)
|
||||
*/
|
||||
unsigned char logo[16][16] = {
|
||||
{7,7,7,7,7,7,5,3,3,3,3,3,3,3,3,7},
|
||||
{7,7,7,7,7,7,7,5,3,3,3,3,3,3,3,7},
|
||||
{1,1,1,1,2,7,7,7,3,3,3,3,3,3,3,7},
|
||||
{0,0,0,0,0,1,7,7,5,3,3,3,3,3,3,7},
|
||||
{0,0,0,0,0,0,7,7,6,3,3,3,3,3,6,7},
|
||||
{0,0,0,0,0,0,7,7,5,3,3,3,3,4,7,7},
|
||||
{0,0,0,0,0,2,7,7,4,3,3,3,3,7,7,7},
|
||||
{0,2,2,2,7,7,7,6,3,3,3,3,6,7,7,7},
|
||||
{0,7,7,7,7,7,6,3,3,3,3,5,7,7,2,7},
|
||||
{0,1,7,7,7,4,3,3,3,3,3,7,7,7,0,7},
|
||||
{0,0,2,7,7,6,3,3,3,3,6,7,7,1,0,7},
|
||||
{0,0,0,2,7,7,5,3,3,5,7,7,2,0,0,7},
|
||||
{0,0,0,0,7,7,7,5,4,7,7,2,0,0,0,7},
|
||||
{0,0,0,0,0,7,7,7,7,7,7,0,0,0,0,7},
|
||||
{0,0,0,0,0,1,7,7,7,7,1,0,0,0,0,7},
|
||||
{7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7}
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* ANSI color codes:
|
||||
* https://stackoverflow.com/questions/4842424/list-of-ansi-color-escape-sequences
|
||||
*/
|
||||
|
||||
|
||||
#define ANSIRGB(R,G,B) "\033[48;2;" #R ";" #G ";" #B "m "
|
||||
|
||||
#define ANSICOL(C) "\033[" #C "m "
|
||||
|
||||
|
||||
/*
|
||||
* The colormap.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This ones corresponds to the official RISC-V logo,
|
||||
* but uses more bandwidth (full RGB ANSI codes)
|
||||
*/
|
||||
/*
|
||||
const char* cmap[8] = {
|
||||
ANSIRGB(040,051,116),
|
||||
ANSIRGB(123,128,155),
|
||||
ANSIRGB(170,172,188),
|
||||
ANSIRGB(249,177,021),
|
||||
ANSIRGB(249,190,101),
|
||||
ANSIRGB(249,199,130),
|
||||
ANSIRGB(252,216,176),
|
||||
ANSIRGB(250,251,248)
|
||||
};
|
||||
*/
|
||||
|
||||
/* more compact colormap */
|
||||
const char* cmap[8] = {
|
||||
ANSICOL(44),
|
||||
ANSICOL(104),
|
||||
ANSICOL(47),
|
||||
ANSICOL(102),
|
||||
ANSICOL(103),
|
||||
ANSICOL(103),
|
||||
ANSICOL(103),
|
||||
ANSICOL(107)
|
||||
};
|
||||
|
||||
/*
|
||||
* Generated by TOOLS/make_sintab.c
|
||||
*/
|
||||
|
||||
int sintab[64] = {
|
||||
0,25,49,74,97,120,142,162,181,197,212,225,236,244,251,254,
|
||||
256,254,251,244,236,225,212,197,181,162,142,120,97,74,49,25,
|
||||
0,-25,-49,-74,-97,-120,-142,-162,-181,-197,-212,-225,-236,-244,
|
||||
-251,-254,-256,-254,-251,-244,-236,-225,-212,-197,-181,-162,
|
||||
-142,-120,-97,-74,-49,-25
|
||||
};
|
||||
|
||||
|
||||
#define GL_width 40
|
||||
#define GL_height 40
|
||||
|
||||
|
||||
void main() {
|
||||
|
||||
int frame = 0;
|
||||
int last_col = -1;
|
||||
for(;;) {
|
||||
printf("\033[H"); // reset cursor position
|
||||
|
||||
int scaling = (sintab[frame&63]+400) << 1;
|
||||
int Ux = scaling*sintab[frame & 63];
|
||||
int Uy = scaling*sintab[(frame + 16) & 63];
|
||||
int Vx = -Uy;
|
||||
int Vy = Ux;
|
||||
|
||||
int X0 = -(GL_width/2)*(Ux+Vx);
|
||||
int Y0 = -(GL_height/2)*(Uy+Vy);
|
||||
|
||||
for(int y=0; y<GL_height; ++y) {
|
||||
int X = X0;
|
||||
int Y = Y0;
|
||||
for(int x=0; x<GL_width; ++x) {
|
||||
unsigned char col = logo[(Y >> 18)&15][(X >> 18)&15];
|
||||
printf(col == last_col ? " " : cmap[col]);
|
||||
last_col = col;
|
||||
X += Ux;
|
||||
Y += Uy;
|
||||
}
|
||||
printf("\033[49m\n"); // reset color to black and newline
|
||||
last_col = -1;
|
||||
X0 += Vx;
|
||||
Y0 += Vy;
|
||||
}
|
||||
++frame;
|
||||
#ifdef __linux__
|
||||
usleep(20000);
|
||||
#endif
|
||||
// if(frame > 20) break;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,106 @@
|
||||
// Taken from picorv32
|
||||
//
|
||||
// This is free and unencumbered software released into the public domain.
|
||||
//
|
||||
// Anyone is free to copy, modify, publish, use, compile, sell, or
|
||||
// distribute this software, either in source code form or as a compiled
|
||||
// binary, for any purpose, commercial or non-commercial, and by any
|
||||
// means.
|
||||
|
||||
// A simple Sieve of Eratosthenes
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
|
||||
/*************************************************************************/
|
||||
|
||||
// Note: if this is changed, then checksum need
|
||||
// to be updated as well.
|
||||
#define BITMAP_SIZE 64
|
||||
|
||||
typedef int bool;
|
||||
|
||||
static uint32_t bitmap[BITMAP_SIZE/32];
|
||||
|
||||
static uint32_t hash;
|
||||
|
||||
static uint32_t mkhash(uint32_t a, uint32_t b)
|
||||
{
|
||||
// The XOR version of DJB2
|
||||
return ((a << 5) + a) ^ b;
|
||||
}
|
||||
|
||||
static void bitmap_set(int idx)
|
||||
{
|
||||
bitmap[idx/32] |= 1 << (idx % 32);
|
||||
}
|
||||
|
||||
static bool bitmap_get(int idx)
|
||||
{
|
||||
return (bitmap[idx/32] & (1 << (idx % 32))) != 0;
|
||||
}
|
||||
|
||||
static void print_prime(int idx, int val)
|
||||
{
|
||||
if (idx < 10)
|
||||
printf(" ");
|
||||
printf("%d",idx);
|
||||
|
||||
if (idx / 10 == 1)
|
||||
goto force_th;
|
||||
switch (idx % 10) {
|
||||
case 1: printf("st"); break;
|
||||
case 2: printf("nd"); break;
|
||||
case 3: printf("rd"); break;
|
||||
force_th:
|
||||
default: printf("th"); break;
|
||||
}
|
||||
printf(" prime: %d\n",val);
|
||||
|
||||
hash = mkhash(hash, idx);
|
||||
hash = mkhash(hash, val);
|
||||
}
|
||||
|
||||
void sieve(void)
|
||||
{
|
||||
|
||||
int idx = 1;
|
||||
hash = 5381;
|
||||
print_prime(idx++, 2);
|
||||
for (int i = 0; i < BITMAP_SIZE; i++) {
|
||||
if (bitmap_get(i))
|
||||
continue;
|
||||
print_prime(idx++, 3+2*i);
|
||||
for (int j = 2*(3+2*i);; j += 3+2*i) {
|
||||
if (j%2 == 0)
|
||||
continue;
|
||||
int k = (j-3)/2;
|
||||
if (k >= BITMAP_SIZE)
|
||||
break;
|
||||
bitmap_set(k);
|
||||
}
|
||||
}
|
||||
|
||||
printf("checksum:\n %x",hash);
|
||||
|
||||
if (hash == 0x1772A48F) {
|
||||
printf(" OK\n");
|
||||
} else {
|
||||
printf(" ERROR\n");
|
||||
}
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
|
||||
for(;;) {
|
||||
sieve();
|
||||
for(int i=0; i<10; ++i) {
|
||||
wait();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,10 @@
|
||||
MEMORY {
|
||||
FLASH (RX) : ORIGIN = 0x00820000, LENGTH = 0x100000 /* 4 MB in flash */
|
||||
}
|
||||
SECTIONS {
|
||||
everything : {
|
||||
. = ALIGN(4);
|
||||
start.o (.text)
|
||||
*(.*)
|
||||
} >FLASH
|
||||
}
|
||||
@@ -0,0 +1,60 @@
|
||||
/* Linker script for programs stored in SPI flash */
|
||||
/* Inspired from picorv32/picosoc/sections.lds */
|
||||
/* */
|
||||
/* text and rodata sections are sent to flash */
|
||||
/* bss sections are sent to BRAM */
|
||||
/* data sections are sent to BRAM and have */
|
||||
/* initialization data in flash. */
|
||||
/* AT keyword specifies LMA (Load Memory Address) */
|
||||
|
||||
MEMORY {
|
||||
FLASH (rx) : ORIGIN = 0x00820000, LENGTH = 0x100000 /* 4 MB in flash */
|
||||
RAM (rwx) : ORIGIN = 0x00000000, LENGTH = 0x1800 /* 6 kB in RAM */
|
||||
}
|
||||
|
||||
SECTIONS {
|
||||
|
||||
/*
|
||||
* This is the initialized data and fastcode section
|
||||
* The program executes knowing that the data is in the RAM
|
||||
* but the loader puts the initial values in the FLASH (inidata).
|
||||
* It is one task of the startup (crt0_spiflash.S) to copy the initial values from FLASH to RAM.
|
||||
*/
|
||||
.data : AT ( _sidata ) {
|
||||
|
||||
. = ALIGN(4);
|
||||
|
||||
_sdata = .; /* create a global symbol at data start; used by startup code in order to initialise the .data section in RAM */
|
||||
_ram_start = .; /* create a global symbol at ram start (e.g., for garbage collector) */
|
||||
|
||||
/* Initialized data */
|
||||
*(.data*)
|
||||
*(.sdata*)
|
||||
|
||||
. = ALIGN(4);
|
||||
_edata = .; /* define a global symbol at data end; used by startup code in order to initialise the .data section in RAM */
|
||||
} > RAM
|
||||
|
||||
/* The (non fastcode) program code and other data goes into FLASH */
|
||||
.text : {
|
||||
. = ALIGN(4);
|
||||
start_spiflash1.o(.text) /* c runtime initialization (code) */
|
||||
*(.text*) /* .text* sections (code) */
|
||||
. = ALIGN(4);
|
||||
*(.rodata*) /* .rodata* sections (constants, strings, etc.) */
|
||||
*(.srodata*) /* .rodata* sections (constants, strings, etc.) */
|
||||
_etext = .; /* define a global symbol at end of code */
|
||||
_sidata = _etext; /* This is used by the startup in order to initialize the .data section */
|
||||
} >FLASH
|
||||
|
||||
/* Uninitialized data section */
|
||||
.bss : {
|
||||
. = ALIGN(4);
|
||||
_sbss = .; /* define a global symbol at bss start; used by startup code */
|
||||
*(.bss*)
|
||||
*(.sbss*)
|
||||
*(COMMON)
|
||||
. = ALIGN(4);
|
||||
_ebss = .; /* define a global symbol at bss end; used by startup code */
|
||||
} >RAM
|
||||
}
|
||||
@@ -0,0 +1,80 @@
|
||||
/* Linker script for programs stored in SPI flash */
|
||||
/* Inspired from picorv32/picosoc/sections.lds */
|
||||
/* */
|
||||
/* text and rodata sections are sent to flash */
|
||||
/* bss sections are sent to BRAM */
|
||||
/* data sections are sent to BRAM and have */
|
||||
/* initialization data in flash. */
|
||||
/* AT keyword specifies LMA (Load Memory Address) */
|
||||
|
||||
MEMORY {
|
||||
FLASH (rx) : ORIGIN = 0x00820000, LENGTH = 0x100000 /* 4 MB in flash */
|
||||
RAM (rwx) : ORIGIN = 0x00000000, LENGTH = 0x1800 /* 6 kB in RAM */
|
||||
}
|
||||
|
||||
SECTIONS {
|
||||
|
||||
|
||||
/*
|
||||
* This is the initialized data and fastcode section
|
||||
* The program executes knowing that the data is in the RAM
|
||||
* but the loader puts the initial values in the FLASH (inidata).
|
||||
* It is one task of the startup (crt0_spiflash.S) to copy the initial values from FLASH to RAM.
|
||||
*/
|
||||
.data_and_fastcode : AT ( _sidata ) {
|
||||
. = ALIGN(4);
|
||||
_sdata = .; /* create a global symbol at data start; used by startup code in order to initialise the .data section in RAM */
|
||||
_ram_start = .; /* create a global symbol at ram start (e.g., for garbage collector) */
|
||||
|
||||
/* Initialized data */
|
||||
*(.data*)
|
||||
*(.sdata*)
|
||||
|
||||
/* integer mul and div */
|
||||
*/libgcc.a:muldi3.o(.text)
|
||||
*/libgcc.a:div.o(.text)
|
||||
|
||||
putchar.o(.text)
|
||||
print.o(.text)
|
||||
|
||||
/* functions with attribute((section(".fastcode"))) */
|
||||
*(.fastcode*)
|
||||
|
||||
. = ALIGN(4);
|
||||
_edata = .; /* define a global symbol at data end; used by startup code in order to initialise the .data section in RAM */
|
||||
} > RAM
|
||||
|
||||
/* The (non fastcode) program code and other data goes into FLASH */
|
||||
.text : {
|
||||
. = ALIGN(4);
|
||||
start_spiflash1.o(.text) /* c runtime initialization (code) */
|
||||
|
||||
/*
|
||||
* I do not understand why, but if I do not put this section, I got
|
||||
* an overlapping sections error with some programs (for instance pi.c
|
||||
* or C++ programs)
|
||||
*/
|
||||
*(.eh_frame)
|
||||
*(.eh_frame_hdr)
|
||||
*(.init_array)
|
||||
*(.gcc_except_table*)
|
||||
|
||||
*(.text*) /* .text* sections (code) */
|
||||
. = ALIGN(4);
|
||||
*(.rodata*) /* .rodata* sections (constants, strings, etc.) */
|
||||
*(.srodata*) /* .rodata* sections (constants, strings, etc.) */
|
||||
_etext = .; /* define a global symbol at end of code */
|
||||
_sidata = _etext; /* This is used by the startup in order to initialize the .data section */
|
||||
} >FLASH
|
||||
|
||||
/* Uninitialized data section */
|
||||
.bss : {
|
||||
. = ALIGN(4);
|
||||
_sbss = .; /* define a global symbol at bss start; used by startup code */
|
||||
*(.bss*)
|
||||
*(.sbss*)
|
||||
*(COMMON)
|
||||
. = ALIGN(4);
|
||||
_ebss = .; /* define a global symbol at bss end; used by startup code */
|
||||
} >RAM
|
||||
}
|
||||
@@ -0,0 +1,87 @@
|
||||
/* Linker script for programs stored in SPI flash */
|
||||
/* Inspired from picorv32/picosoc/sections.lds */
|
||||
/* */
|
||||
/* text and rodata sections are sent to flash */
|
||||
/* bss sections are sent to BRAM */
|
||||
/* data sections are sent to BRAM and have */
|
||||
/* initialization data in flash. */
|
||||
/* AT keyword specifies LMA (Load Memory Address) */
|
||||
|
||||
MEMORY {
|
||||
FLASH (rx) : ORIGIN = 0x00820000, LENGTH = 0x100000 /* 4 MB in flash */
|
||||
RAM (rwx) : ORIGIN = 0x00000000, LENGTH = 0x1800 /* 6 kB in RAM */
|
||||
}
|
||||
|
||||
SECTIONS {
|
||||
|
||||
|
||||
/*
|
||||
* This is the initialized data and fastcode section
|
||||
* The program executes knowing that the data is in the RAM
|
||||
* but the loader puts the initial values in the FLASH (inidata).
|
||||
* It is one task of the startup (crt0_spiflash.S) to copy the initial values from FLASH to RAM.
|
||||
*/
|
||||
.data_and_fastcode : AT ( _sidata ) {
|
||||
. = ALIGN(4);
|
||||
_sdata = .; /* create a global symbol at data start; used by startup code in order to initialise the .data section in RAM */
|
||||
_ram_start = .; /* create a global symbol at ram start (e.g., for garbage collector) */
|
||||
|
||||
/* Initialized data */
|
||||
*(.data*)
|
||||
*(.sdata*)
|
||||
|
||||
/* integer mul and div */
|
||||
*/libgcc.a:muldi3.o(.text)
|
||||
*/libgcc.a:div.o(.text)
|
||||
|
||||
/* putchar.o(.text) */
|
||||
|
||||
/* functions with attribute((section(".fastcode"))) */
|
||||
*(.fastcode*)
|
||||
|
||||
. = ALIGN(4);
|
||||
_edata = .; /* define a global symbol at data end; used by startup code in order to initialise the .data section in RAM */
|
||||
} > RAM
|
||||
|
||||
/* The (non fastcode) program code and other data goes into FLASH */
|
||||
.text : {
|
||||
. = ALIGN(4);
|
||||
start_spiflash1.o(.text) /* c runtime initialization (code) */
|
||||
|
||||
/*
|
||||
* I do not understand why, but if I do not put this section, I got
|
||||
* an overlapping sections error with some programs (for instance pi.c
|
||||
* or C++ programs)
|
||||
*/
|
||||
*(.eh_frame)
|
||||
*(.eh_frame_hdr)
|
||||
*(.init_array*)
|
||||
*(.gcc_except_table*)
|
||||
|
||||
*(.text*) /* .text* sections (code) */
|
||||
. = ALIGN(4);
|
||||
*(.rodata*) /* .rodata* sections (constants, strings, etc.) */
|
||||
*(.srodata*) /* .rodata* sections (constants, strings, etc.) */
|
||||
_etext = .; /* define a global symbol at end of code */
|
||||
_sidata = _etext; /* This is used by the startup in order to initialize the .data section */
|
||||
} >FLASH
|
||||
|
||||
/* Uninitialized data section */
|
||||
.bss : {
|
||||
. = ALIGN(4);
|
||||
_sbss = .; /* define a global symbol at bss start; used by startup code */
|
||||
*(.bss*)
|
||||
*(.sbss*)
|
||||
*(COMMON)
|
||||
. = ALIGN(4);
|
||||
_ebss = .; /* define a global symbol at bss end; used by startup code */
|
||||
} >RAM
|
||||
|
||||
/* this is to define the start of the heap, and make sure we have a minimum size */
|
||||
.heap : {
|
||||
. = ALIGN(4);
|
||||
_heap_start = .; /* define a global symbol at heap start */
|
||||
_end = .; /* as expected by syscalls.c */
|
||||
} >RAM
|
||||
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
.equ IO_BASE, 0x400000
|
||||
.section .text
|
||||
.globl start
|
||||
start:
|
||||
li gp,IO_BASE
|
||||
li sp,0x1800
|
||||
call main
|
||||
ebreak
|
||||
|
||||
@@ -0,0 +1,9 @@
|
||||
.equ IO_BASE, 0x400000
|
||||
.section .text
|
||||
.globl start
|
||||
start:
|
||||
li gp,IO_BASE
|
||||
li sp,0x20000
|
||||
call main
|
||||
ebreak
|
||||
|
||||
@@ -0,0 +1,43 @@
|
||||
.equ IO_BASE, 0x400000
|
||||
|
||||
.text
|
||||
.global _start
|
||||
.type _start, @function
|
||||
|
||||
_start:
|
||||
.option push
|
||||
.option norelax
|
||||
li gp,IO_BASE
|
||||
.option pop
|
||||
|
||||
li sp,0x1800
|
||||
|
||||
# zero-init bss section:
|
||||
# clears from _sbss to _ebss
|
||||
# _sbss and _ebss are defined by linker script (spiflash.ld)
|
||||
la a0, _sbss
|
||||
la a1, _ebss
|
||||
bge a0, a1, end_init_bss
|
||||
loop_init_bss:
|
||||
sw zero, 0(a0)
|
||||
addi a0, a0, 4
|
||||
blt a0, a1, loop_init_bss
|
||||
end_init_bss:
|
||||
|
||||
# copy data section from SPI Flash to BRAM:
|
||||
# copies from _sidata (in flash) to _sdata ... _edata (in BRAM)
|
||||
# _sidata, _sdata and _edata are defined by linker script (spiflash.ld)
|
||||
la a0, _sidata
|
||||
la a1, _sdata
|
||||
la a2, _edata
|
||||
bge a1, a2, end_init_data
|
||||
loop_init_data:
|
||||
lw a3, 0(a0)
|
||||
sw a3, 0(a1)
|
||||
addi a0, a0, 4
|
||||
addi a1, a1, 4
|
||||
blt a1, a2, loop_init_data
|
||||
end_init_data:
|
||||
|
||||
call main
|
||||
ebreak
|
||||
@@ -0,0 +1,13 @@
|
||||
#include "perf.h"
|
||||
|
||||
int main() {
|
||||
for(int i=0; i<100; ++i) {
|
||||
uint64_t cycles = rdcycle();
|
||||
uint64_t instret = rdinstret();
|
||||
printf("i=%d cycles=%d instret=%d\n", i, (int)cycles, (int)instret);
|
||||
}
|
||||
uint64_t instret = rdinstret();
|
||||
uint64_t cycles = rdcycle();
|
||||
printf("cycles=%d instret=%d 100CPI=%d\n", (int)cycles, (int)instret, (int)(100*cycles/instret));
|
||||
|
||||
}
|
||||
@@ -0,0 +1,17 @@
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#define SPI_FLASH_BASE ((uint32_t*)(1 << 23))
|
||||
|
||||
int main() {
|
||||
for(;;) {
|
||||
for(int i=0; i<40; ++i) {
|
||||
uint32_t word = SPI_FLASH_BASE[i];
|
||||
char* c = (char*)&word;
|
||||
printf("%d 0x%x %c%c%c%c\n", i, word, c[0],c[1],c[2],c[3]);
|
||||
}
|
||||
printf("\n");
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,444 @@
|
||||
/* A port of Dmitry Sokolov's tiny raytracer to C and to FemtoRV32 */
|
||||
/* Displays on the small OLED display and/or HDMI */
|
||||
/* Bruno Levy, 2020 */
|
||||
/* Original tinyraytracer: https://github.com/ssloy/tinyraytracer */
|
||||
|
||||
#include <stdint.h>
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
/*******************************************************************/
|
||||
|
||||
typedef int BOOL;
|
||||
|
||||
static inline float max(float x, float y) { return x>y?x:y; }
|
||||
static inline float min(float x, float y) { return x<y?x:y; }
|
||||
|
||||
/*******************************************************************/
|
||||
|
||||
// If you want to adapt tinyraytracer to your own platform, there are
|
||||
// mostly two macros and two functions to write:
|
||||
// graphics_width
|
||||
// graphics_height
|
||||
// graphics_init()
|
||||
// graphics_set_pixel()
|
||||
//
|
||||
// You can also write the following functions (or leave them empty if
|
||||
// you do not need them):
|
||||
// graphics_terminate()
|
||||
// stats_begin_frame()
|
||||
// stats_begin_pixel()
|
||||
// stats_end_pixel()
|
||||
// stats_end_frame()
|
||||
|
||||
|
||||
// Size of the screen
|
||||
// Replace with your own variables or values
|
||||
#define graphics_width 120
|
||||
#define graphics_height 60
|
||||
|
||||
// Two pixels per character using UTF8 character set
|
||||
// (comment-out if terminal does not support it)
|
||||
#define graphics_double_lines
|
||||
|
||||
// Replace with your own stuff to initialize graphics
|
||||
static inline void graphics_init() {
|
||||
printf("\033[48;5;16m" // set background color black
|
||||
"\033[H" // home
|
||||
"\033[2J"); // clear screen
|
||||
}
|
||||
|
||||
// Replace with your own stuff to terminate graphics or leave empty
|
||||
// Here I send <ctrl><D> to the UART, to exit the simulation in Verilator,
|
||||
// it is captured by special code in RTL/DEVICES/uart.v
|
||||
static inline void graphics_terminate() {
|
||||
}
|
||||
|
||||
|
||||
// Replace with your own code.
|
||||
void graphics_set_pixel(int x, int y, float r, float g, float b) {
|
||||
r = max(0.0f, min(1.0f, r));
|
||||
g = max(0.0f, min(1.0f, g));
|
||||
b = max(0.0f, min(1.0f, b));
|
||||
uint8_t R = (uint8_t)(255.0f * r);
|
||||
uint8_t G = (uint8_t)(255.0f * g);
|
||||
uint8_t B = (uint8_t)(255.0f * b);
|
||||
#ifdef graphics_double_lines
|
||||
static uint8_t prev_R=0;
|
||||
static uint8_t prev_G=0;
|
||||
static uint8_t prev_B=0;
|
||||
if(y&1) {
|
||||
if((R == prev_R) && (G == prev_G) && (B == prev_B)) {
|
||||
printf("\033[48;2;%d;%d;%dm ",(int)R,(int)G,(int)B);
|
||||
} else {
|
||||
printf("\033[48;2;%d;%d;%dm",(int)prev_R,(int)prev_G,(int)prev_B);
|
||||
printf("\033[38;2;%d;%d;%dm",(int)R,(int)G,(int)B);
|
||||
// https://www.w3.org/TR/xml-entity-names/025.html
|
||||
// https://onlineunicodetools.com/convert-unicode-to-utf8
|
||||
printf("\xE2\x96\x83");
|
||||
}
|
||||
if(x == graphics_width-1) {
|
||||
printf("\033[38;2;0;0;0m");
|
||||
printf("\033[48;2;0;0;0m\n");
|
||||
}
|
||||
} else {
|
||||
prev_R = R;
|
||||
prev_G = G;
|
||||
prev_B = B;
|
||||
}
|
||||
#else
|
||||
printf("\033[48;2;%d;%d;%dm ",(int)R,(int)G,(int)B);
|
||||
if(x == graphics_width-1) {
|
||||
printf("\033[48;2;0;0;0m\n");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
// Begins statistics collection for current frame.
|
||||
// Leave emtpy if not needed.
|
||||
static inline stats_begin_frame() {
|
||||
}
|
||||
|
||||
// Begins statistics collection for current pixel
|
||||
// Leave emtpy if not needed.
|
||||
// There are these two levels because on some
|
||||
// femtorv32 cores (quark, tachyon), the clock tick counter does not
|
||||
// have sufficient bits and will wrap during the time taken by
|
||||
// rendering a frame (up to several minutes).
|
||||
static inline stats_begin_pixel() {
|
||||
}
|
||||
|
||||
// Ends statistics collection for current pixel
|
||||
// Leave emtpy if not needed.
|
||||
static inline stats_end_pixel() {
|
||||
}
|
||||
|
||||
// Ends statistics collection for current frame
|
||||
// and displays result.
|
||||
// Leave emtpy if not needed.
|
||||
static inline stats_end_frame() {
|
||||
}
|
||||
|
||||
// Normally you will not need to modify anything beyond that point.
|
||||
/*******************************************************************/
|
||||
|
||||
typedef struct { float x,y,z; } vec3;
|
||||
typedef struct { float x,y,z,w; } vec4;
|
||||
|
||||
static inline vec3 make_vec3(float x, float y, float z) {
|
||||
vec3 V;
|
||||
V.x = x; V.y = y; V.z = z;
|
||||
return V;
|
||||
}
|
||||
|
||||
static inline vec4 make_vec4(float x, float y, float z, float w) {
|
||||
vec4 V;
|
||||
V.x = x; V.y = y; V.z = z; V.w = w;
|
||||
return V;
|
||||
}
|
||||
|
||||
static inline vec3 vec3_neg(vec3 V) {
|
||||
return make_vec3(-V.x, -V.y, -V.z);
|
||||
}
|
||||
|
||||
static inline vec3 vec3_add(vec3 U, vec3 V) {
|
||||
return make_vec3(U.x+V.x, U.y+V.y, U.z+V.z);
|
||||
}
|
||||
|
||||
static inline vec3 vec3_sub(vec3 U, vec3 V) {
|
||||
return make_vec3(U.x-V.x, U.y-V.y, U.z-V.z);
|
||||
}
|
||||
|
||||
static inline float vec3_dot(vec3 U, vec3 V) {
|
||||
return U.x*V.x+U.y*V.y+U.z*V.z;
|
||||
}
|
||||
|
||||
static inline vec3 vec3_scale(float s, vec3 U) {
|
||||
return make_vec3(s*U.x, s*U.y, s*U.z);
|
||||
}
|
||||
|
||||
static inline float vec3_length(vec3 U) {
|
||||
return sqrtf(U.x*U.x+U.y*U.y+U.z*U.z);
|
||||
}
|
||||
|
||||
static inline vec3 vec3_normalize(vec3 U) {
|
||||
return vec3_scale(1.0f/vec3_length(U),U);
|
||||
}
|
||||
|
||||
/*************************************************************************/
|
||||
|
||||
typedef struct Light {
|
||||
vec3 position;
|
||||
float intensity;
|
||||
} Light;
|
||||
|
||||
Light make_Light(vec3 position, float intensity) {
|
||||
Light L;
|
||||
L.position = position;
|
||||
L.intensity = intensity;
|
||||
return L;
|
||||
}
|
||||
|
||||
/*************************************************************************/
|
||||
|
||||
typedef struct {
|
||||
float refractive_index;
|
||||
vec4 albedo;
|
||||
vec3 diffuse_color;
|
||||
float specular_exponent;
|
||||
} Material;
|
||||
|
||||
Material make_Material(float r, vec4 a, vec3 color, float spec) {
|
||||
Material M;
|
||||
M.refractive_index = r;
|
||||
M.albedo = a;
|
||||
M.diffuse_color = color;
|
||||
M.specular_exponent = spec;
|
||||
return M;
|
||||
}
|
||||
|
||||
Material make_Material_default() {
|
||||
Material M;
|
||||
M.refractive_index = 1;
|
||||
M.albedo = make_vec4(1,0,0,0);
|
||||
M.diffuse_color = make_vec3(0,0,0);
|
||||
M.specular_exponent = 0;
|
||||
return M;
|
||||
}
|
||||
|
||||
/*************************************************************************/
|
||||
|
||||
typedef struct {
|
||||
vec3 center;
|
||||
float radius;
|
||||
Material material;
|
||||
} Sphere;
|
||||
|
||||
Sphere make_Sphere(vec3 c, float r, Material M) {
|
||||
Sphere S;
|
||||
S.center = c;
|
||||
S.radius = r;
|
||||
S.material = M;
|
||||
return S;
|
||||
}
|
||||
|
||||
BOOL Sphere_ray_intersect(Sphere* S, vec3 orig, vec3 dir, float* t0) {
|
||||
vec3 L = vec3_sub(S->center, orig);
|
||||
float tca = vec3_dot(L,dir);
|
||||
float d2 = vec3_dot(L,L) - tca*tca;
|
||||
float r2 = S->radius*S->radius;
|
||||
if (d2 > r2) return 0;
|
||||
float thc = sqrtf(r2 - d2);
|
||||
*t0 = tca - thc;
|
||||
float t1 = tca + thc;
|
||||
if (*t0 < 0) *t0 = t1;
|
||||
if (*t0 < 0) return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
vec3 reflect(vec3 I, vec3 N) {
|
||||
return vec3_sub(I, vec3_scale(2.f*vec3_dot(I,N),N));
|
||||
}
|
||||
|
||||
vec3 refract(vec3 I, vec3 N, float eta_t, float eta_i /* =1.f */) {
|
||||
// Snell's law
|
||||
float cosi = -max(-1.f, min(1.f, vec3_dot(I,N)));
|
||||
// if the ray comes from the inside the object, swap the air and the media
|
||||
if (cosi<0) return refract(I, vec3_neg(N), eta_i, eta_t);
|
||||
float eta = eta_i / eta_t;
|
||||
float k = 1 - eta*eta*(1 - cosi*cosi);
|
||||
// k<0 = total reflection, no ray to refract.
|
||||
// I refract it anyways, this has no physical meaning
|
||||
return k<0 ? make_vec3(1,0,0)
|
||||
: vec3_add(vec3_scale(eta,I),vec3_scale((eta*cosi - sqrtf(k)),N));
|
||||
}
|
||||
|
||||
BOOL scene_intersect(
|
||||
vec3 orig, vec3 dir, Sphere* spheres, int nb_spheres,
|
||||
vec3* hit, vec3* N, Material* material
|
||||
) {
|
||||
float spheres_dist = 1e30;
|
||||
for(int i=0; i<nb_spheres; ++i) {
|
||||
float dist_i;
|
||||
if(
|
||||
Sphere_ray_intersect(&spheres[i], orig, dir, &dist_i) &&
|
||||
(dist_i < spheres_dist)
|
||||
) {
|
||||
spheres_dist = dist_i;
|
||||
*hit = vec3_add(orig,vec3_scale(dist_i,dir));
|
||||
*N = vec3_normalize(vec3_sub(*hit, spheres[i].center));
|
||||
*material = spheres[i].material;
|
||||
}
|
||||
}
|
||||
float checkerboard_dist = 1e30;
|
||||
if (fabs(dir.y)>1e-3) {
|
||||
float d = -(orig.y+4)/dir.y; // the checkerboard plane has equation y = -4
|
||||
vec3 pt = vec3_add(orig, vec3_scale(d,dir));
|
||||
if (d>0 && fabs(pt.x)<10 && pt.z<-10 && pt.z>-30 && d<spheres_dist) {
|
||||
checkerboard_dist = d;
|
||||
*hit = pt;
|
||||
*N = make_vec3(0,1,0);
|
||||
material->diffuse_color =
|
||||
(((int)(.5*hit->x+1000) + (int)(.5*hit->z)) & 1)
|
||||
? make_vec3(.3, .3, .3)
|
||||
: make_vec3(.3, .2, .1);
|
||||
}
|
||||
}
|
||||
return min(spheres_dist, checkerboard_dist)<1000;
|
||||
}
|
||||
|
||||
vec3 cast_ray(
|
||||
vec3 orig, vec3 dir, Sphere* spheres, int nb_spheres,
|
||||
Light* lights, int nb_lights, int depth /* =0 */
|
||||
) {
|
||||
vec3 point,N;
|
||||
Material material = make_Material_default();
|
||||
if (
|
||||
depth>2 ||
|
||||
!scene_intersect(orig, dir, spheres, nb_spheres, &point, &N, &material)
|
||||
) {
|
||||
float s = 0.5*(dir.y + 1.0);
|
||||
return vec3_add(
|
||||
vec3_scale(s,make_vec3(0.2, 0.7, 0.8)),
|
||||
vec3_scale(s,make_vec3(0.0, 0.0, 0.5))
|
||||
);
|
||||
}
|
||||
|
||||
vec3 reflect_dir=vec3_normalize(reflect(dir, N));
|
||||
vec3 refract_dir=vec3_normalize(refract(dir,N,material.refractive_index,1));
|
||||
|
||||
// offset the original point to avoid occlusion by the object itself
|
||||
vec3 reflect_orig =
|
||||
vec3_dot(reflect_dir,N) < 0
|
||||
? vec3_sub(point,vec3_scale(1e-3,N))
|
||||
: vec3_add(point,vec3_scale(1e-3,N));
|
||||
vec3 refract_orig =
|
||||
vec3_dot(refract_dir,N) < 0
|
||||
? vec3_sub(point,vec3_scale(1e-3,N))
|
||||
: vec3_add(point,vec3_scale(1e-3,N));
|
||||
vec3 reflect_color = cast_ray(
|
||||
reflect_orig, reflect_dir, spheres, nb_spheres,
|
||||
lights, nb_lights, depth + 1
|
||||
);
|
||||
vec3 refract_color = cast_ray(
|
||||
refract_orig, refract_dir, spheres, nb_spheres,
|
||||
lights, nb_lights, depth + 1
|
||||
);
|
||||
|
||||
float diffuse_light_intensity = 0, specular_light_intensity = 0;
|
||||
for (int i=0; i<nb_lights; i++) {
|
||||
vec3 light_dir = vec3_normalize(vec3_sub(lights[i].position,point));
|
||||
float light_distance = vec3_length(vec3_sub(lights[i].position,point));
|
||||
|
||||
vec3 shadow_orig =
|
||||
vec3_dot(light_dir,N) < 0
|
||||
? vec3_sub(point,vec3_scale(1e-3,N))
|
||||
: vec3_add(point,vec3_scale(1e-3,N)) ;
|
||||
// checking if the point lies in the shadow of the lights[i]
|
||||
vec3 shadow_pt, shadow_N;
|
||||
Material tmpmaterial;
|
||||
if (
|
||||
scene_intersect(
|
||||
shadow_orig, light_dir, spheres, nb_spheres,
|
||||
&shadow_pt, &shadow_N, &tmpmaterial
|
||||
) && (
|
||||
vec3_length(vec3_sub(shadow_pt,shadow_orig)) < light_distance
|
||||
)
|
||||
) continue ;
|
||||
|
||||
diffuse_light_intensity +=
|
||||
lights[i].intensity * max(0.f, vec3_dot(light_dir,N));
|
||||
|
||||
float abc = max(
|
||||
0.f, vec3_dot(vec3_neg(reflect(vec3_neg(light_dir), N)),dir)
|
||||
);
|
||||
float def = material.specular_exponent;
|
||||
if(abc > 0.0f && def > 0.0f) {
|
||||
specular_light_intensity += powf(abc,def)*lights[i].intensity;
|
||||
}
|
||||
}
|
||||
vec3 result = vec3_scale(
|
||||
diffuse_light_intensity * material.albedo.x, material.diffuse_color
|
||||
);
|
||||
result = vec3_add(
|
||||
result, vec3_scale(specular_light_intensity * material.albedo.y,
|
||||
make_vec3(1,1,1))
|
||||
);
|
||||
result = vec3_add(result, vec3_scale(material.albedo.z, reflect_color));
|
||||
result = vec3_add(result, vec3_scale(material.albedo.w, refract_color));
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline void render_pixel(
|
||||
int i, int j, Sphere* spheres, int nb_spheres, Light* lights, int nb_lights
|
||||
) {
|
||||
const float fov = M_PI/3.;
|
||||
stats_begin_pixel();
|
||||
float dir_x = (i + 0.5) - graphics_width/2.;
|
||||
float dir_y = -(j + 0.5) + graphics_height/2.; // this flips the image.
|
||||
float dir_z = -graphics_height/(2.*tan(fov/2.));
|
||||
vec3 C = cast_ray(
|
||||
make_vec3(0,0,0), vec3_normalize(make_vec3(dir_x, dir_y, dir_z)),
|
||||
spheres, nb_spheres, lights, nb_lights, 0
|
||||
);
|
||||
graphics_set_pixel(i,j,C.x,C.y,C.z);
|
||||
stats_end_pixel();
|
||||
}
|
||||
|
||||
void render(Sphere* spheres, int nb_spheres, Light* lights, int nb_lights) {
|
||||
stats_begin_frame();
|
||||
graphics_init();
|
||||
#ifdef graphics_double_lines
|
||||
for (int j = 0; j<graphics_height; j+=2) {
|
||||
for (int i = 0; i<graphics_width; i++) {
|
||||
render_pixel(i,j ,spheres,nb_spheres,lights,nb_lights);
|
||||
render_pixel(i,j+1,spheres,nb_spheres,lights,nb_lights);
|
||||
}
|
||||
}
|
||||
#else
|
||||
for (int j = 0; j<graphics_height; j++) {
|
||||
for (int i = 0; i<graphics_width; i++) {
|
||||
render_pixel(i,j ,spheres,nb_spheres,lights,nb_lights);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
graphics_terminate();
|
||||
stats_end_frame();
|
||||
}
|
||||
|
||||
int nb_spheres = 4;
|
||||
Sphere spheres[4];
|
||||
|
||||
int nb_lights = 3;
|
||||
Light lights[3];
|
||||
|
||||
void init_scene() {
|
||||
Material ivory = make_Material(
|
||||
1.0, make_vec4(0.6, 0.3, 0.1, 0.0), make_vec3(0.4, 0.4, 0.3), 50.
|
||||
);
|
||||
Material glass = make_Material(
|
||||
1.5, make_vec4(0.0, 0.5, 0.1, 0.8), make_vec3(0.6, 0.7, 0.8), 125.
|
||||
);
|
||||
Material red_rubber = make_Material(
|
||||
1.0, make_vec4(0.9, 0.1, 0.0, 0.0), make_vec3(0.3, 0.1, 0.1), 10.
|
||||
);
|
||||
Material mirror = make_Material(
|
||||
1.0, make_vec4(0.0, 10.0, 0.8, 0.0), make_vec3(1.0, 1.0, 1.0), 142.
|
||||
);
|
||||
|
||||
spheres[0] = make_Sphere(make_vec3(-3, 0, -16), 2, ivory);
|
||||
spheres[1] = make_Sphere(make_vec3(-1.0, -1.5, -12), 2, glass);
|
||||
spheres[2] = make_Sphere(make_vec3( 1.5, -0.5, -18), 3, red_rubber);
|
||||
spheres[3] = make_Sphere(make_vec3( 7, 5, -18), 4, mirror);
|
||||
|
||||
lights[0] = make_Light(make_vec3(-20, 20, 20), 1.5);
|
||||
lights[1] = make_Light(make_vec3( 30, 50, -25), 1.8);
|
||||
lights[2] = make_Light(make_vec3( 30, 20, 30), 1.7);
|
||||
}
|
||||
|
||||
int main() {
|
||||
init_scene();
|
||||
render(spheres, nb_spheres, lights, nb_lights);
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,173 @@
|
||||
#ifndef TTY_GRAPHICS_H
|
||||
#define TTY_GRAPHICS_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
|
||||
/**
|
||||
* \brief Resets default tty colors (white foreground, black background)
|
||||
* \details It is useful to call this function once all graphics are finished,
|
||||
* else text output might be invisible or difficult to see depending on
|
||||
* current foreground and background colors.
|
||||
*/
|
||||
static inline void tty_graphics_reset_colors() {
|
||||
printf("\033[48;5;16m" // set background color black
|
||||
"\033[38;5;15m" // set foreground color white
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Moves the cursor position to the origin (top left).
|
||||
*/
|
||||
static inline void tty_graphics_home() {
|
||||
printf("\033[H");
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Clears the terminal.
|
||||
*/
|
||||
static inline void tty_graphics_clear() {
|
||||
printf("\033[2J");
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Initializes "graphics mode".
|
||||
* \details resets default colors, clears the terminal and moves the
|
||||
* cursor to the top-left position.
|
||||
*/
|
||||
static inline void tty_graphics_init() {
|
||||
tty_graphics_reset_colors();
|
||||
tty_graphics_home();
|
||||
tty_graphics_clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Terminates "graphics mode".
|
||||
* \details Restores default foreground and background colors.
|
||||
*/
|
||||
static inline void tty_graphics_terminate() {
|
||||
tty_graphics_reset_colors();
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Moves the cursor to a specific location.
|
||||
*/
|
||||
static inline void tty_graphics_gotoXY(int x, int y) {
|
||||
printf("\033[%d;%dH",y,x);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Draws a "pixel" (a block) at the current
|
||||
* cursor position and advances the current cursor
|
||||
* position.
|
||||
*/
|
||||
static inline void tty_graphics_draw_one_pixel(
|
||||
uint8_t r, uint8_t g, uint8_t b
|
||||
) {
|
||||
printf("\033[48;2;%d;%d;%dm ",(int)r,(int)g,(int)b);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Draws two "pixels" at the current
|
||||
* cursor position and advances the current cursor
|
||||
* position.
|
||||
* \details Characters are roughly twice as high as wide.
|
||||
* To generate square pixels, this function draws two pixels in
|
||||
* the same character, using the special lower-half white / upper-half
|
||||
* black character, and setting the background and foreground colors.
|
||||
*/
|
||||
static inline void tty_graphics_draw_two_pixels(
|
||||
uint8_t r1, uint8_t g1, uint8_t b1,
|
||||
uint8_t r2, uint8_t g2, uint8_t b2
|
||||
) {
|
||||
if((r2 == r1) && (g2 == g1) && (b2 == b1)) {
|
||||
tty_graphics_draw_one_pixel(r1,g1,b1);
|
||||
} else {
|
||||
printf("\033[48;2;%d;%d;%dm",(int)r1,(int)g1,(int)b1);
|
||||
printf("\033[38;2;%d;%d;%dm",(int)r2,(int)g2,(int)b2);
|
||||
// https://www.w3.org/TR/xml-entity-names/025.html
|
||||
// https://onlineunicodetools.com/convert-unicode-to-utf8
|
||||
// https://copypastecharacter.com/
|
||||
printf("\xE2\x96\x83");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Moves the cursor position to the next line.
|
||||
* \details Background and foreground colors are set to black.
|
||||
*/
|
||||
static inline void tty_graphics_newline() {
|
||||
printf("\033[38;2;0;0;0m");
|
||||
printf("\033[48;2;0;0;0m\n");
|
||||
}
|
||||
|
||||
typedef void (*tty_graphics_pixelfunc)(int x, int y, uint8_t* r, uint8_t* g, uint8_t* b);
|
||||
typedef void (*tty_graphics_fpixelfunc)(int x, int y, float* r, float* g, float* b);
|
||||
|
||||
/**
|
||||
* \brief Draws an image by calling a user-specified function for each pixel.
|
||||
* \param[in] width , height dimension of the image in square pixels
|
||||
* \param[in] do_pixel the user function to be called for each pixel (a "shader"), that
|
||||
* determines the (integer) components r,g,b of the pixel's color.
|
||||
* \details Uses half-charater pixels.
|
||||
*/
|
||||
static inline void tty_graphics_scan(int width, int height, tty_graphics_pixelfunc do_pixel) {
|
||||
uint8_t r1, g1, b1;
|
||||
uint8_t r2, g2, b2;
|
||||
tty_graphics_home();
|
||||
for (int j = 0; j<height; j+=2) {
|
||||
for (int i = 0; i<width; i++) {
|
||||
do_pixel(i,j , &r1, &g1, &b1);
|
||||
do_pixel(i,j+1, &r2, &g2, &b2);
|
||||
tty_graphics_draw_two_pixels(r1,g1,b1,r2,g2,b2);
|
||||
if(i == width-1) {
|
||||
tty_graphics_newline();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* brief Converts a floating point value to a byte.
|
||||
* \param[in] the floating point value in [0,1]
|
||||
* \return the byte, in [0,255]
|
||||
* \details the input value is clamped to [0,1]
|
||||
*/
|
||||
static inline uint8_t tty_graphics_ftoi(float f) {
|
||||
f = (f < 0.0f) ? 0.0f : f;
|
||||
f = (f > 1.0f) ? 1.0f : f;
|
||||
return (uint8_t)(255.0f * f);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Draws an image by calling a user-specified function for each pixel.
|
||||
* \param[in] width , height dimension of the image in square pixels
|
||||
* \param[in] do_pixel the user function to be called for each pixel (a "shader"), that
|
||||
* determines the (floating-point) components fr,fg,fb of the pixel's color.
|
||||
* \details Uses half-charater pixels.
|
||||
*/
|
||||
static inline void tty_graphics_fscan(int width, int height, tty_graphics_fpixelfunc do_pixel) {
|
||||
float fr1, fg1, fb1;
|
||||
float fr2, fg2, fb2;
|
||||
uint8_t r1, g1, b1;
|
||||
uint8_t r2, g2, b2;
|
||||
tty_graphics_home();
|
||||
for (int j = 0; j<height; j+=2) {
|
||||
for (int i = 0; i<width; i++) {
|
||||
do_pixel(i,j , &fr1, &fg1, &fb1);
|
||||
r1 = tty_graphics_ftoi(fr1);
|
||||
g1 = tty_graphics_ftoi(fg1);
|
||||
b1 = tty_graphics_ftoi(fb1);
|
||||
do_pixel(i,j+1, &fr2, &fg2, &fb2);
|
||||
r2 = tty_graphics_ftoi(fr2);
|
||||
g2 = tty_graphics_ftoi(fg2);
|
||||
b2 = tty_graphics_ftoi(fb2);
|
||||
tty_graphics_draw_two_pixels(r1,g1,b1,r2,g2,b2);
|
||||
if(i == width-1) {
|
||||
tty_graphics_newline();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,38 @@
|
||||
#include "tty_graphics.h"
|
||||
#include <math.h>
|
||||
|
||||
#ifdef __linux__
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
// Size of the screen
|
||||
// Replace with your own variables or values
|
||||
#define graphics_width 80
|
||||
#define graphics_height 40
|
||||
|
||||
int frame = 0;
|
||||
float f = 0.0;
|
||||
|
||||
void do_pixel(int i, int j, float* R, float* G, float* B) {
|
||||
float x = (float)i;
|
||||
float y = (float)j;
|
||||
*R = 0.5f*(sin(x*0.1+f)+1.0);
|
||||
*G = 0.5f*(sin(y*0.1+2.0*f)+1.0);
|
||||
*B = 0.5f*(sin((x+y)*0.05-3.0*f)+1.0);
|
||||
}
|
||||
|
||||
int main() {
|
||||
tty_graphics_init();
|
||||
for(;;) {
|
||||
tty_graphics_fscan(graphics_width, graphics_height, do_pixel);
|
||||
f += 0.1;
|
||||
++frame;
|
||||
tty_graphics_reset_colors();
|
||||
printf("frame = %d\n",frame);
|
||||
#ifdef __linux__
|
||||
usleep(40000);
|
||||
#endif
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
.section .text
|
||||
.globl wait
|
||||
|
||||
wait:
|
||||
li t0,1
|
||||
slli t0, t0,17
|
||||
.L0:
|
||||
addi t0,t0,-1
|
||||
bnez t0, .L0
|
||||
ret
|
||||
|
||||
+3694
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,7 @@
|
||||
## Toolchain
|
||||
- Yosys / Yosys NextPNR / Yosys Apicula
|
||||
|
||||
## ToDo
|
||||
|
||||
- Check documentation Yosys !
|
||||
- RISC-V ISA SET Manual
|
||||
+3533
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,30 @@
|
||||
|
||||
module bench();
|
||||
reg CLK; // Main clock
|
||||
reg RESET; // Active-low reset
|
||||
wire [4:0] LEDS; // LED outputs
|
||||
reg RXD = 1'b1; // UART receive (idle high)
|
||||
wire TXD; // UART transmit
|
||||
|
||||
// Device Under Test
|
||||
SOC uut(
|
||||
.clk(CLK),
|
||||
.rst_i(RESET),
|
||||
.led(LEDS),
|
||||
.RXD(RXD),
|
||||
.TXD(TXD)
|
||||
);
|
||||
|
||||
reg[4:0] prev_LEDS = 0;
|
||||
initial begin
|
||||
CLK = 0;
|
||||
forever begin
|
||||
#1 CLK = ~CLK;
|
||||
if(LEDS != prev_LEDS) begin
|
||||
$display("LEDS = %b",LEDS);
|
||||
end
|
||||
prev_LEDS <= LEDS;
|
||||
end
|
||||
end
|
||||
endmodule
|
||||
|
||||
@@ -0,0 +1,55 @@
|
||||
/*
|
||||
* Copyright (c) 2020, Bruno Levy
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
`default_nettype none
|
||||
|
||||
module Clockworks (
|
||||
input wire CLK,
|
||||
input wire RESET,
|
||||
output wire clk,
|
||||
output wire resetn
|
||||
);
|
||||
parameter SLOW = 0;
|
||||
|
||||
assign resetn = RESET ^ `INV_BTN;
|
||||
generate
|
||||
if (SLOW != 0) begin
|
||||
localparam slow_bits = SLOW;
|
||||
|
||||
reg [SLOW:0] slow_CLK = 0;
|
||||
always @(posedge CLK) begin
|
||||
slow_CLK <= slow_CLK + 1;
|
||||
end
|
||||
assign clk = slow_CLK[slow_bits];
|
||||
end else begin
|
||||
assign clk = CLK;
|
||||
end
|
||||
endgenerate
|
||||
endmodule
|
||||
|
||||
@@ -0,0 +1,24 @@
|
||||
/**
|
||||
* Step 1: Blinker
|
||||
* DONE
|
||||
*/
|
||||
|
||||
`default_nettype none
|
||||
|
||||
module SOC (
|
||||
input clk, // system clock
|
||||
input rst_i, // reset button
|
||||
output [3:0] led, // system LEDs
|
||||
input RXD, // UART receive
|
||||
output TXD // UART transmit
|
||||
);
|
||||
|
||||
|
||||
// A blinker that counts on 5 bits, wired to the 5 LEDs
|
||||
reg [3:0] count = 0;
|
||||
always @(posedge clk) begin
|
||||
count <= count + 1;
|
||||
end
|
||||
assign led = count;
|
||||
assign TXD = 1'b0; // not used for now
|
||||
endmodule
|
||||
@@ -0,0 +1,40 @@
|
||||
/**
|
||||
* Step 2: Blinker (slower version)
|
||||
* DONE*
|
||||
*/
|
||||
|
||||
`default_nettype none
|
||||
`include "clockworks.v"
|
||||
|
||||
module SOC (
|
||||
input clk, // system clock
|
||||
input rst_i, // reset button
|
||||
output [4:0] led, // system LEDs
|
||||
input RXD, // UART receive
|
||||
output TXD // UART transmit
|
||||
);
|
||||
|
||||
wire clkI; // internal clock
|
||||
wire resetn; // internal reset signal, goes low on reset
|
||||
|
||||
// A blinker that counts on 5 bits, wired to the 5 LEDs
|
||||
reg [4:0] count = 0;
|
||||
always @(posedge clkI) begin
|
||||
count <= !resetn ? 0 : count + 1;
|
||||
end
|
||||
|
||||
// Clock gearbox (to let you see what happens)
|
||||
// and reset circuitry (to workaround an
|
||||
// initialization problem with Ice40)
|
||||
Clockworks #(
|
||||
.SLOW(21) // Divide clock frequency by 2^21
|
||||
)CW(
|
||||
.CLK(clk),
|
||||
.RESET(rst_i),
|
||||
.clk(clkI),
|
||||
.resetn(resetn)
|
||||
);
|
||||
|
||||
assign led = count;
|
||||
assign TXD = 1'b0; // not used for now
|
||||
endmodule
|
||||
@@ -0,0 +1,59 @@
|
||||
`include "clockworks.v"
|
||||
|
||||
module SOC (
|
||||
input clk,
|
||||
input rst_i,
|
||||
output [4:0] led,
|
||||
output TXD,
|
||||
input RXD
|
||||
);
|
||||
|
||||
wire clkI, resetn;
|
||||
|
||||
reg [4:0] PC = 0;
|
||||
reg [4:0] MEM [0:20];
|
||||
|
||||
initial begin
|
||||
MEM[0] = 5'b00000;
|
||||
MEM[1] = 5'b00001;
|
||||
MEM[2] = 5'b00010;
|
||||
MEM[3] = 5'b00100;
|
||||
MEM[4] = 5'b01000;
|
||||
MEM[5] = 5'b10000;
|
||||
MEM[6] = 5'b10001;
|
||||
MEM[7] = 5'b10010;
|
||||
MEM[8] = 5'b10100;
|
||||
MEM[9] = 5'b11000;
|
||||
MEM[10] = 5'b11001;
|
||||
MEM[11] = 5'b11010;
|
||||
MEM[12] = 5'b11100;
|
||||
MEM[13] = 5'b11101;
|
||||
MEM[14] = 5'b11110;
|
||||
MEM[15] = 5'b11111;
|
||||
MEM[16] = 5'b11110;
|
||||
MEM[17] = 5'b11100;
|
||||
MEM[18] = 5'b11000;
|
||||
MEM[19] = 5'b10000;
|
||||
MEM[20] = 5'b00000;
|
||||
end
|
||||
|
||||
reg [4:0] leds = 0;
|
||||
assign led = leds;
|
||||
|
||||
always @(posedge clkI) begin
|
||||
leds <= MEM[PC];
|
||||
PC <= (!resetn || PC == 20) ? 0 : (PC + 1);
|
||||
end
|
||||
|
||||
Clockworks #(
|
||||
.SLOW(21)
|
||||
)clkw(
|
||||
.CLK(clk),
|
||||
.RESET(rst_i),
|
||||
.clk(clkI),
|
||||
.resetn(resetn)
|
||||
);
|
||||
|
||||
assign TXD = 1'b0;
|
||||
|
||||
endmodule
|
||||
@@ -0,0 +1,141 @@
|
||||
/**
|
||||
* Step 4: Creating a RISC-V processor
|
||||
* The instruction decoder
|
||||
* central LED blinks, other LEDs show instr type.
|
||||
* DONE*
|
||||
*/
|
||||
|
||||
`default_nettype none
|
||||
`include "clockworks.v"
|
||||
|
||||
module SOC (
|
||||
input clk, // system clock
|
||||
input rst_i, // reset button
|
||||
output [4:0] led, // system LEDs
|
||||
input RXD, // UART receive
|
||||
output TXD // UART transmit
|
||||
);
|
||||
|
||||
wire clk_i; // internal clock
|
||||
wire resetn; // internal reset signal, goes low on reset
|
||||
|
||||
reg [31:0] MEM [0:255];
|
||||
reg [31:0] PC; // program counter
|
||||
reg [31:0] instr; // current instruction
|
||||
|
||||
initial begin
|
||||
PC = 0;
|
||||
// add x0, x0, x0
|
||||
// rs2 rs1 add rd ALUREG
|
||||
instr = 32'b0000000_00000_00000_000_00000_0110011;
|
||||
// add x1, x0, x0
|
||||
// rs2 rs1 add rd ALUREG
|
||||
MEM[0] = 32'b0000000_00000_00000_000_00001_0110011;
|
||||
// addi x1, x1, 1
|
||||
// imm rs1 add rd ALUIMM
|
||||
MEM[1] = 32'b000000000001_00001_000_00001_0010011;
|
||||
// addi x1, x1, 1
|
||||
// imm rs1 add rd ALUIMM
|
||||
MEM[2] = 32'b000000000001_00001_000_00001_0010011;
|
||||
// addi x1, x1, 1
|
||||
// imm rs1 add rd ALUIMM
|
||||
MEM[3] = 32'b000000000001_00001_000_00001_0010011;
|
||||
// addi x1, x1, 1
|
||||
// imm rs1 add rd ALUIMM
|
||||
MEM[4] = 32'b000000000001_00001_000_00001_0010011;
|
||||
// lw x2,0(x1)
|
||||
// imm rs1 w rd LOAD
|
||||
MEM[5] = 32'b000000000000_00001_010_00010_0000011;
|
||||
// sw x2,0(x1)
|
||||
// imm rs2 rs1 w imm STORE
|
||||
MEM[6] = 32'b000000_00010_00001_010_00000_0100011;
|
||||
|
||||
// ebreak
|
||||
// SYSTEM
|
||||
MEM[7] = 32'b000000000001_00000_000_00000_1110011;
|
||||
|
||||
end
|
||||
|
||||
|
||||
// See the table P. 105 in RISC-V manual
|
||||
|
||||
// The 10 RISC-V instructions
|
||||
wire isALUreg = (instr[6:0] == 7'b0110011); // rd <- rs1 OP rs2
|
||||
wire isALUimm = (instr[6:0] == 7'b0010011); // rd <- rs1 OP Iimm
|
||||
wire isBranch = (instr[6:0] == 7'b1100011); // if(rs1 OP rs2) PC<-PC+Bimm
|
||||
wire isJALR = (instr[6:0] == 7'b1100111); // rd <- PC+4; PC<-rs1+Iimm
|
||||
wire isJAL = (instr[6:0] == 7'b1101111); // rd <- PC+4; PC<-PC+Jimm
|
||||
wire isAUIPC = (instr[6:0] == 7'b0010111); // rd <- PC + Uimm
|
||||
wire isLUI = (instr[6:0] == 7'b0110111); // rd <- Uimm
|
||||
wire isLoad = (instr[6:0] == 7'b0000011); // rd <- mem[rs1+Iimm]
|
||||
wire isStore = (instr[6:0] == 7'b0100011); // mem[rs1+Simm] <- rs2
|
||||
wire isSYSTEM = (instr[6:0] == 7'b1110011); // special
|
||||
|
||||
// The 5 immediate formats
|
||||
wire [31:0] Uimm={ instr[31], instr[30:12], {12{1'b0}}};
|
||||
wire [31:0] Iimm={{21{instr[31]}}, instr[30:20]};
|
||||
wire [31:0] Simm={{21{instr[31]}}, instr[30:25],instr[11:7]};
|
||||
wire [31:0] Bimm={{20{instr[31]}}, instr[7],instr[30:25],instr[11:8],1'b0};
|
||||
wire [31:0] Jimm={{12{instr[31]}}, instr[19:12],instr[20],instr[30:21],1'b0};
|
||||
|
||||
// Source and destination registers
|
||||
wire [4:0] rs1Id = instr[19:15];
|
||||
wire [4:0] rs2Id = instr[24:20];
|
||||
wire [4:0] rdId = instr[11:7];
|
||||
|
||||
// function codes
|
||||
wire [2:0] funct3 = instr[14:12];
|
||||
wire [6:0] funct7 = instr[31:25];
|
||||
|
||||
always @(posedge clk_i) begin
|
||||
if(!resetn) begin
|
||||
PC <= 0;
|
||||
instr <= 32'b0000000_00000_00000_000_00000_0110011; // NOP
|
||||
end else if(!isSYSTEM) begin
|
||||
instr <= MEM[PC];
|
||||
PC <= PC+1;
|
||||
end
|
||||
`ifdef BENCH
|
||||
if(isSYSTEM) $finish();
|
||||
`endif
|
||||
end
|
||||
|
||||
assign led = isSYSTEM ? 31 : {PC[0],isALUreg,isALUimm,isStore,isLoad};
|
||||
|
||||
`ifdef BENCH
|
||||
always @(posedge clk_i) begin
|
||||
$display("PC=%0d",PC);
|
||||
case (1'b1)
|
||||
isALUreg: $display(
|
||||
"ALUreg rd=%d rs1=%d rs2=%d funct3=%b",
|
||||
rdId, rs1Id, rs2Id, funct3
|
||||
);
|
||||
isALUimm: $display(
|
||||
"ALUimm rd=%d rs1=%d imm=%0d funct3=%b",
|
||||
rdId, rs1Id, Iimm, funct3
|
||||
);
|
||||
isBranch: $display("BRANCH");
|
||||
isJAL: $display("JAL");
|
||||
isJALR: $display("JALR");
|
||||
isAUIPC: $display("AUIPC");
|
||||
isLUI: $display("LUI");
|
||||
isLoad: $display("LOAD");
|
||||
isStore: $display("STORE");
|
||||
isSYSTEM: $display("SYSTEM");
|
||||
endcase
|
||||
end
|
||||
`endif
|
||||
|
||||
// Gearbox and reset circuitry.
|
||||
Clockworks #(
|
||||
.SLOW(21) // Divide clock frequency by 2^21
|
||||
)CW(
|
||||
.CLK(clk),
|
||||
.RESET(rst_i),
|
||||
.clk(clk_i),
|
||||
.resetn(resetn)
|
||||
);
|
||||
|
||||
assign TXD = 1'b0; // not used for now
|
||||
endmodule
|
||||
|
||||
@@ -0,0 +1,54 @@
|
||||
/* Register bank and state machine */
|
||||
`default_nettype none
|
||||
`include "clockworks.v"
|
||||
|
||||
module SOC (
|
||||
input clk,
|
||||
input rst_i,
|
||||
input [4:0] led,
|
||||
input RXD,
|
||||
output TXD
|
||||
);
|
||||
|
||||
wire clk_i;
|
||||
wire resetn;
|
||||
|
||||
reg [31:0] MEM [0:31];
|
||||
reg [31:0] instr;
|
||||
reg [31:0] PC;
|
||||
|
||||
inital begin
|
||||
PC = 0;
|
||||
|
||||
//addi x0, x0, 0
|
||||
// imm 12bit- rs1 5bit- funct3 3bit- rd 5bit - opC 7bit
|
||||
instr = 32'b0000_0000_0000_0000_0000_0000_0001_0011;
|
||||
|
||||
// add x1, x0, x0
|
||||
MEM[1] = 32'b0000_0000_0000_0000_0000_0000_1011_0011;
|
||||
|
||||
// addi x1, x1, 1
|
||||
MEM[2] = 32'b0000_0000_0001_0000_1000_0000_1001_0011;
|
||||
|
||||
// addi x1, x1, 1
|
||||
MEM[3] = 32'b0000_0000_0001_0000_1000_0000_1001_0011;
|
||||
|
||||
// addi x1, x1, 1
|
||||
MEM[4] = 32'b0000_0000_0001_0000_1000_0000_1001_0011;
|
||||
|
||||
// ebreak
|
||||
MEM[5] = 32'b0000_0000_0001_0000_0000_0000_0111_0011;
|
||||
|
||||
end
|
||||
|
||||
wire isALUreg = (instr[6:0] == 7'b011_0011);
|
||||
wire isALUimm = (instr[6:0] == 7'b001_0011);
|
||||
wire isLUI = (instr[6:0] == 7'b011_0111);
|
||||
wire isAUIPC = (instr[6:0] == 7'b001_0111);
|
||||
wire isJAL = (instr[6:0] == 7'b110_1111);
|
||||
wire isJALR = (instr[6:0] == 7'b110_0111);
|
||||
wire isBRANCH = (instr[6:0] == 7'b110_0011);
|
||||
wire isLOAD = (instr[6:0] == 7'b000_0011);
|
||||
wire isSTORE = (instr[6:0] == 7'b010_0011);
|
||||
wire isSYSTEM = (instr[6:0] == 7'b111_0011);
|
||||
|
||||
Reference in New Issue
Block a user