Skip to content

Commit 2990196

Browse files
committed
Upgrade to 2.10.0-hide-3.1.0
1 parent aacde26 commit 2990196

40 files changed

+6937
-244
lines changed

CMakeLists.txt

+5
Original file line numberDiff line numberDiff line change
@@ -508,18 +508,23 @@ if(CMAKE_C_COMPILER_ID MATCHES "MSVC")
508508
# asm optimized monero v8 code
509509
enable_language(ASM_MASM)
510510
set_property(SOURCE "xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop.asm" PROPERTY ASM_MASM)
511+
set_property(SOURCE "xmrstak/backend/cpu/crypto/asm/cnR/CryptonightR_template.asm" PROPERTY ASM_MASM)
511512
add_library(xmr-stak-asm
512513
STATIC
513514
"xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop.asm"
515+
"xmrstak/backend/cpu/crypto/asm/cnR/CryptonightR_template.asm"
514516
)
515517
else()
516518
# asm optimized monero v8 code
517519
enable_language(ASM)
518520
set_property(SOURCE "xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop.S" PROPERTY CPP)
521+
set_property(SOURCE "xmrstak/backend/cpu/crypto/asm/cnR/CryptonightR_template.S" PROPERTY CPP)
519522
set_source_files_properties("xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop.S" PROPERTIES COMPILE_FLAGS "-x assembler-with-cpp")
523+
set_source_files_properties("xmrstak/backend/cpu/crypto/asm/cnR/CryptonightR_template.S" PROPERTIES COMPILE_FLAGS "-x assembler-with-cpp")
520524
add_library(xmr-stak-asm
521525
STATIC
522526
"xmrstak/backend/cpu/crypto/asm/cryptonight_v8_main_loop.S"
527+
"xmrstak/backend/cpu/crypto/asm/cnR/CryptonightR_template.S"
523528
)
524529
endif()
525530

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ If your prefered coin is not listed, you can choose one of the following algorit
7979
- cryptonight_v7_stellite
8080
- cryptonight_v8
8181
- cryptonight_v8_half (used by masari and stellite)
82+
- cryptonight_v8_reversewaltz (used by graft)
8283
- cryptonight_v8_zelerius
8384
- 4MiB scratchpad memory
8485
- cryptonight_haven

xmrstak/backend/amd/OclCryptonightR_gen.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,7 @@ cl_program CryptonightR_get_program(GpuContext* ctx, xmrstak_algo algo, uint64_t
282282
code_size = v4_random_math_init<cryptonight_r>(code, height);
283283
break;
284284
default:
285-
printer::inst()->print_msg(LDEBUG, "CryptonightR_get_program: invalid algo %d", algo);
285+
printer::inst()->print_msg(L0, "CryptonightR_get_program: invalid algo %d", algo);
286286
return nullptr;
287287
}
288288

xmrstak/backend/amd/amd_gpu/gpu.cpp

+14-22
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,7 @@ size_t InitOpenCLGpu(cl_context opencl_ctx, GpuContext* ctx, const char* source_
294294
* this is required if the dev pool is mining monero
295295
* but the user tuned there settings for another currency
296296
*/
297-
if(miner_algo == cryptonight_monero_v8)
297+
if(miner_algo == cryptonight_monero_v8 || miner_algo == cryptonight_v8_reversewaltz)
298298
{
299299
if(ctx->memChunk < 2)
300300
mem_chunk_exp = 1u << 2;
@@ -774,7 +774,7 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx)
774774
// Same as the platform index sanity check, except we must check all requested device indexes
775775
for(int i = 0; i < num_gpus; ++i)
776776
{
777-
if(entries <= ctx[i].deviceIdx)
777+
if(ctx[i].deviceIdx >= entries)
778778
{
779779
printer::inst()->print_msg(L1,"Selected OpenCL device index %lu doesn't exist.\n", ctx[i].deviceIdx);
780780
return ERR_STUPID_PARAMS;
@@ -793,17 +793,22 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx)
793793
}
794794

795795
// Indexes sanity checked above
796-
#ifdef __GNUC__
797-
cl_device_id TempDeviceList[num_gpus];
798-
#else
799-
cl_device_id* TempDeviceList = (cl_device_id*)_alloca(entries * sizeof(cl_device_id));
800-
#endif
796+
std::vector<cl_device_id> TempDeviceList(num_gpus, nullptr);
797+
798+
printer::inst()->print_msg(LDEBUG, "Number of OpenCL GPUs %d", entries);
801799
for(int i = 0; i < num_gpus; ++i)
802800
{
803801
ctx[i].DeviceID = DeviceIDList[ctx[i].deviceIdx];
804802
TempDeviceList[i] = DeviceIDList[ctx[i].deviceIdx];
805803
}
806804

805+
cl_context opencl_ctx = clCreateContext(NULL, num_gpus, TempDeviceList.data(), NULL, NULL, &ret);
806+
if(ret != CL_SUCCESS)
807+
{
808+
printer::inst()->print_msg(L1,"Error %s when calling clCreateContext.", err_to_str(ret));
809+
return ERR_OCL_API;
810+
}
811+
807812
const char *fastIntMathV2CL =
808813
#include "./opencl/fast_int_math_v2.cl"
809814
;
@@ -847,22 +852,9 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx)
847852

848853
std::vector<std::shared_ptr<InterleaveData>> interleaveData(num_gpus, nullptr);
849854

850-
std::vector<cl_context> context_vec(entries, nullptr);
851-
for(int i = 0; i < num_gpus; ++i)
852-
{
853-
if(context_vec[ctx[i].deviceIdx] == nullptr)
854-
{
855-
context_vec[ctx[i].deviceIdx] = clCreateContext(NULL, 1, &(ctx[i].DeviceID), NULL, NULL, &ret);
856-
if(ret != CL_SUCCESS)
857-
{
858-
printer::inst()->print_msg(L1,"Error %s when calling clCreateContext.", err_to_str(ret));
859-
return ERR_OCL_API;
860-
}
861-
}
862-
}
863-
864855
for(int i = 0; i < num_gpus; ++i)
865856
{
857+
printer::inst()->print_msg(LDEBUG,"OpenCL Init device %d", ctx[i].deviceIdx);
866858
const size_t devIdx = ctx[i].deviceIdx;
867859
if(interleaveData.size() <= devIdx)
868860
{
@@ -879,7 +871,7 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx)
879871
ctx[i].interleaveData = interleaveData[devIdx];
880872
ctx[i].interleaveData->adjustThreshold = static_cast<double>(ctx[i].interleave)/100.0;
881873
ctx[i].interleaveData->startAdjustThreshold = ctx[i].interleaveData->adjustThreshold;
882-
ctx[i].opencl_ctx = context_vec[ctx[i].deviceIdx];
874+
ctx[i].opencl_ctx = opencl_ctx;
883875

884876
if((ret = InitOpenCLGpu(ctx->opencl_ctx, &ctx[i], source_code.c_str())) != ERR_SUCCESS)
885877
{

xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl

+24-9
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ R"===(
3030
#define cryptonight_superfast 12
3131
#define cryptonight_gpu 13
3232
#define cryptonight_conceal 14
33+
#define cryptonight_v8_reversewaltz 17
3334

3435
/* For Mesa clover support */
3536
#ifdef cl_clang_storage_class_specifiers
@@ -639,7 +640,7 @@ __kernel void JOIN(cn0,ALGO)(__global ulong *input, __global uint4 *Scratchpad,
639640
R"===(
640641

641642
// __NV_CL_C_VERSION checks if NVIDIA opencl is used
642-
#if(ALGO == cryptonight_monero_v8 && defined(__NV_CL_C_VERSION))
643+
#if((ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz) && defined(__NV_CL_C_VERSION))
643644
# define SCRATCHPAD_CHUNK(N) (*(__local uint4*)((__local uchar*)(scratchpad_line) + (idxS ^ (N << 4))))
644645
# define SCRATCHPAD_CHUNK_GLOBAL (*((__global uint16*)(Scratchpad + (IDX((idx0 & 0x1FFFC0U) >> 4)))))
645646
#else
@@ -659,7 +660,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
659660
float4 conc_var = (float4)(0.0f);
660661
#endif
661662

662-
#if(ALGO == cryptonight_monero_v8)
663+
#if(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz)
663664
ulong b[4];
664665
uint4 b_x[2];
665666
// NVIDIA
@@ -673,7 +674,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
673674
#endif
674675
__local uint AES0[256], AES1[256];
675676

676-
#if(ALGO == cryptonight_monero_v8)
677+
#if(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz)
677678
# if defined(__clang__) && !defined(__NV_CL_C_VERSION)
678679
__local uint RCP[256];
679680
# endif
@@ -689,7 +690,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
689690
AES0[i] = tmp;
690691
AES1[i] = rotate(tmp, 8U);
691692

692-
#if(ALGO == cryptonight_monero_v8 && (defined(__clang__) && !defined(__NV_CL_C_VERSION)))
693+
#if((ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz) && (defined(__clang__) && !defined(__NV_CL_C_VERSION)))
693694
RCP[i] = RCP_C[i];
694695
#endif
695696
}
@@ -723,7 +724,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
723724

724725
b_x[0] = ((uint4 *)b)[0];
725726

726-
#if(ALGO == cryptonight_monero_v8)
727+
#if(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz)
727728
a[1] = states[1] ^ states[5];
728729
b[2] = states[8] ^ states[10];
729730
b[3] = states[9] ^ states[11];
@@ -755,7 +756,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
755756
{
756757
ulong c[2];
757758

758-
#if(ALGO == cryptonight_monero_v8 && defined(__NV_CL_C_VERSION))
759+
#if((ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz) && defined(__NV_CL_C_VERSION))
759760
uint idxS = idx0 & 0x30U;
760761
*scratchpad_line = SCRATCHPAD_CHUNK_GLOBAL;
761762
#endif
@@ -792,6 +793,15 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
792793
SCRATCHPAD_CHUNK(2) = as_uint4(chunk1 + ((ulong2 *)b_x)[0]);
793794
SCRATCHPAD_CHUNK(3) = as_uint4(chunk2 + ((ulong2 *)a)[0]);
794795
}
796+
#elif(ALGO == cryptonight_v8_reversewaltz)
797+
{
798+
ulong2 chunk3 = as_ulong2(SCRATCHPAD_CHUNK(1));
799+
ulong2 chunk2 = as_ulong2(SCRATCHPAD_CHUNK(2));
800+
ulong2 chunk1 = as_ulong2(SCRATCHPAD_CHUNK(3));
801+
SCRATCHPAD_CHUNK(1) = as_uint4(chunk3 + ((ulong2 *)(b_x + 1))[0]);
802+
SCRATCHPAD_CHUNK(2) = as_uint4(chunk1 + ((ulong2 *)b_x)[0]);
803+
SCRATCHPAD_CHUNK(3) = as_uint4(chunk2 + ((ulong2 *)a)[0]);
804+
}
795805
#endif
796806

797807
#if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari || ALGO == cryptonight_bittube2)
@@ -807,7 +817,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
807817
SCRATCHPAD_CHUNK(0) = b_x[0];
808818
idx0 = as_uint2(c[0]).s0 & MASK;
809819

810-
#elif(ALGO == cryptonight_monero_v8)
820+
#elif(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz)
811821
SCRATCHPAD_CHUNK(0) = b_x[0] ^ ((uint4 *)c)[0];
812822
# ifdef __NV_CL_C_VERSION
813823
// flush shuffled data
@@ -826,7 +836,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
826836
uint4 tmp;
827837
tmp = SCRATCHPAD_CHUNK(0);
828838

829-
#if(ALGO == cryptonight_monero_v8)
839+
#if(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz)
830840
// Use division and square root results from the _previous_ iteration to hide the latency
831841
tmp.s0 ^= division_result.s0;
832842
tmp.s1 ^= division_result.s1 ^ sqrt_result;
@@ -853,8 +863,13 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
853863
ulong2 chunk2 = as_ulong2(SCRATCHPAD_CHUNK(2));
854864
result_mul ^= chunk2;
855865
ulong2 chunk3 = as_ulong2(SCRATCHPAD_CHUNK(3));
866+
#if(ALGO == cryptonight_v8_reversewaltz)
867+
SCRATCHPAD_CHUNK(1) = as_uint4(chunk1 + ((ulong2 *)(b_x + 1))[0]);
868+
SCRATCHPAD_CHUNK(2) = as_uint4(chunk3 + ((ulong2 *)b_x)[0]);
869+
#else
856870
SCRATCHPAD_CHUNK(1) = as_uint4(chunk3 + ((ulong2 *)(b_x + 1))[0]);
857871
SCRATCHPAD_CHUNK(2) = as_uint4(chunk1 + ((ulong2 *)b_x)[0]);
872+
#endif
858873
SCRATCHPAD_CHUNK(3) = as_uint4(chunk2 + ((ulong2 *)a)[0]);
859874
a[0] += result_mul.s0;
860875
a[1] += result_mul.s1;
@@ -882,7 +897,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states
882897

883898
((uint4 *)a)[0] ^= tmp;
884899

885-
#if (ALGO == cryptonight_monero_v8)
900+
#if (ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz)
886901
# if defined(__NV_CL_C_VERSION)
887902
// flush shuffled data
888903
SCRATCHPAD_CHUNK_GLOBAL = *scratchpad_line;

xmrstak/backend/amd/amd_gpu/opencl/fast_int_math_v2.cl

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ R"===(
33
* @author SChernykh
44
*/
55

6-
#if(ALGO == cryptonight_monero_v8)
6+
#if(ALGO == cryptonight_monero_v8 || ALGO == cryptonight_v8_reversewaltz)
77

88
static const __constant uint RCP_C[256] =
99
{

xmrstak/backend/amd/config.tpl

+5-5
Original file line numberDiff line numberDiff line change
@@ -6,26 +6,26 @@ R"===(// generated by XMRSTAK_VERSION
66
* intensity - Number of parallel GPU threads (nothing to do with CPU threads)
77
* worksize - Number of local GPU threads (nothing to do with CPU threads)
88
* affine_to_cpu - This will affine the thread to a CPU. This can make a GPU miner play along nicer with a CPU miner.
9-
* strided_index - switch memory pattern used for the scratch pad memory
9+
* strided_index - switch memory pattern used for the scratchpad memory
1010
* 3 = chunked memory, chunk size based on the 'worksize'
1111
* required: intensity must be a multiple of worksize
1212
* 2 = chunked memory, chunk size is controlled by 'mem_chunk'
1313
* required: intensity must be a multiple of worksize
14-
* 1 or true = use 16byte contiguous memory per thread, the next memory block has offset of intensity blocks
14+
* 1 or true = use 16 byte contiguous memory per thread, the next memory block has offset of intensity blocks
1515
* (for cryptonight_v8 and monero it is equal to strided_index = 0)
1616
* 0 or false = use a contiguous block of memory per thread
1717
* mem_chunk - range 0 to 18: set the number of elements (16byte) per chunk
1818
* this value is only used if 'strided_index' == 2
19-
* element count is computed with the equation: 2 to the power of 'mem_chunk' e.g. 4 means a chunk of 16 elements(256byte)
19+
* element count is computed with the equation: 2 to the power of 'mem_chunk' e.g. 4 means a chunk of 16 elements(256 byte)
2020
* unroll - allow to control how often the POW main loop is unrolled; valid range [1;128) - for most OpenCL implementations it must be a power of two.
2121
* comp_mode - Compatibility enable/disable the automatic guard around compute kernel which allows
22-
* to use a intensity which is not the multiple of the worksize.
22+
* to use an intensity which is not the multiple of the worksize.
2323
* If you set false and the intensity is not multiple of the worksize the miner can crash:
2424
* in this case set the intensity to a multiple of the worksize or activate comp_mode.
2525
* interleave - Controls the starting point in time between two threads on the same GPU device relative to the last started thread.
2626
* This option has only an effect if two compute threads using the same GPU device: valid range [0;100]
2727
* 0 = disable thread interleaving
28-
* 40 = each working thread waits until 40% of the hash calculation of the previous started thread is finished
28+
* 40 = each working thread waits until 40% of the hash calculation of the previously started thread is finished
2929
* "gpu_threads_conf" :
3030
* [
3131
* { "index" : 0, "intensity" : 1000, "worksize" : 8, "affine_to_cpu" : false,

xmrstak/backend/amd/minethd.cpp

+4-5
Original file line numberDiff line numberDiff line change
@@ -186,8 +186,7 @@ void minethd::work_main()
186186

187187
cpu::minethd::cn_on_new_job set_job;
188188

189-
cn_hash_fun hash_fun;
190-
cpu::minethd::func_multi_selector<1>(hash_fun, set_job, ::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo);
189+
cpu::minethd::func_multi_selector<1>(&cpu_ctx, set_job, ::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo);
191190

192191
uint8_t version = 0;
193192
size_t lastPoolId = 0;
@@ -228,12 +227,12 @@ void minethd::work_main()
228227
if(new_version >= coinDesc.GetMiningForkVersion())
229228
{
230229
miner_algo = coinDesc.GetMiningAlgo();
231-
cpu::minethd::func_multi_selector<1>(hash_fun, set_job, ::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo);
230+
cpu::minethd::func_multi_selector<1>(&cpu_ctx, set_job, ::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo);
232231
}
233232
else
234233
{
235234
miner_algo = coinDesc.GetMiningAlgoRoot();
236-
cpu::minethd::func_multi_selector<1>(hash_fun, set_job, ::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo);
235+
cpu::minethd::func_multi_selector<1>(&cpu_ctx, set_job, ::jconf::inst()->HaveHardwareAes(), true /*bNoPrefetch*/, miner_algo);
237236
}
238237
lastPoolId = oWork.iPoolId;
239238
version = new_version;
@@ -282,7 +281,7 @@ void minethd::work_main()
282281

283282
*(uint32_t*)(bWorkBlob + 39) = results[i];
284283

285-
hash_fun(bWorkBlob, oWork.iWorkSize, bResult, &cpu_ctx, miner_algo);
284+
cpu_ctx->hash_fn(bWorkBlob, oWork.iWorkSize, bResult, &cpu_ctx, miner_algo);
286285
if ( (*((uint64_t*)(bResult + 24))) < oWork.iTarget)
287286
executor::inst()->push_event(ex_event(job_result(oWork.sJobID, results[i], bResult, iThreadNo, miner_algo), oWork.iPoolId));
288287
else

0 commit comments

Comments
 (0)