diff options
-rw-r--r-- | plugins/dfsound/adsr.c | 125 | ||||
-rw-r--r-- | plugins/dfsound/arm_utils.S | 38 | ||||
-rw-r--r-- | plugins/dfsound/externals.h | 7 | ||||
-rw-r--r-- | plugins/dfsound/reverb.c | 23 | ||||
-rw-r--r-- | plugins/dfsound/spu.c | 438 | ||||
-rw-r--r-- | plugins/dfsound/spu_c64x.c | 157 | ||||
-rw-r--r-- | plugins/dfsound/spu_c64x.h | 7 | ||||
-rw-r--r-- | plugins/dfsound/spu_c64x_dspcode.c | 84 | ||||
-rw-r--r-- | plugins/dfsound/spu_config.h | 3 | ||||
-rw-r--r-- | plugins/dfsound/xa.c | 2 |
10 files changed, 561 insertions, 323 deletions
diff --git a/plugins/dfsound/adsr.c b/plugins/dfsound/adsr.c index 836fdbb..69e55bd 100644 --- a/plugins/dfsound/adsr.c +++ b/plugins/dfsound/adsr.c @@ -217,6 +217,131 @@ done: return ns;
}
+static int SkipADSR(ADSRInfoEx *adsr, int ns_to)
+{
+ int EnvelopeVol = adsr->EnvelopeVol;
+ int ns = 0, val, rto, level;
+
+ if (adsr->State == ADSR_RELEASE)
+ {
+ val = RateTableSub[adsr->ReleaseRate * 4];
+ if (adsr->ReleaseModeExp)
+ {
+ for (; ns < ns_to; ns++)
+ {
+ EnvelopeVol += ((long long)val * EnvelopeVol) >> (15+16);
+ if (EnvelopeVol <= 0)
+ break;
+ }
+ }
+ else
+ {
+ EnvelopeVol += val * ns_to;
+ if (EnvelopeVol > 0)
+ ns = ns_to;
+ }
+ goto done;
+ }
+
+ switch (adsr->State)
+ {
+ case ADSR_ATTACK: // -> attack
+ rto = 0;
+ if (adsr->AttackModeExp && EnvelopeVol >= 0x60000000)
+ rto = 8;
+ val = RateTableAdd[adsr->AttackRate + rto];
+
+ for (; ns < ns_to; ns++)
+ {
+ EnvelopeVol += val;
+ if (EnvelopeVol < 0)
+ break;
+ }
+ if (EnvelopeVol < 0) // overflow
+ {
+ EnvelopeVol = 0x7fffffff;
+ adsr->State = ADSR_DECAY;
+ ns++;
+ goto decay;
+ }
+ break;
+
+ //--------------------------------------------------//
+ decay:
+ case ADSR_DECAY: // -> decay
+ val = RateTableSub[adsr->DecayRate * 4];
+ level = adsr->SustainLevel;
+
+ for (; ns < ns_to; )
+ {
+ EnvelopeVol += ((long long)val * EnvelopeVol) >> (15+16);
+ if (EnvelopeVol < 0)
+ EnvelopeVol = 0;
+
+ ns++;
+
+ if (((EnvelopeVol >> 27) & 0xf) <= level)
+ {
+ adsr->State = ADSR_SUSTAIN;
+ goto sustain;
+ }
+ }
+ break;
+
+ //--------------------------------------------------//
+ sustain:
+ case ADSR_SUSTAIN: // -> sustain
+ if (adsr->SustainIncrease)
+ {
+ if (EnvelopeVol >= 0x7fff0000)
+ {
+ ns = ns_to;
+ break;
+ }
+
+ rto = 0;
+ if (adsr->SustainModeExp && EnvelopeVol >= 0x60000000)
+ rto = 8;
+ val = RateTableAdd[adsr->SustainRate + rto];
+
+ EnvelopeVol += val * (ns_to - ns);
+ if ((unsigned int)EnvelopeVol >= 0x7fe00000)
+ {
+ EnvelopeVol = 0x7fffffff;
+ ns = ns_to;
+ break;
+ }
+ }
+ else
+ {
+ val = RateTableSub[adsr->SustainRate];
+ if (adsr->SustainModeExp)
+ {
+ for (; ns < ns_to; ns++)
+ {
+ EnvelopeVol += ((long long)val * EnvelopeVol) >> (15+16);
+ if (EnvelopeVol < 0)
+ break;
+ }
+ }
+ else
+ {
+ EnvelopeVol += val * (ns_to - ns);
+ if (EnvelopeVol > 0)
+ {
+ ns = ns_to;
+ break;
+ }
+ }
+ }
+ break;
+ }
+
+done:
+ adsr->EnvelopeVol = EnvelopeVol;
+ return ns;
+}
+
#endif
/*
diff --git a/plugins/dfsound/arm_utils.S b/plugins/dfsound/arm_utils.S index 2511bb3..9652313 100644 --- a/plugins/dfsound/arm_utils.S +++ b/plugins/dfsound/arm_utils.S @@ -14,7 +14,6 @@ .data .align 2 ptr_ChanBuf: .word ESYM(ChanBuf) -ptr_SSumLR: .word ESYM(SSumLR) #endif .text @@ -36,15 +35,11 @@ ptr_SSumLR: .word ESYM(SSumLR) #ifdef __ARM_NEON__ -FUNCTION(mix_chan): @ (int start, int count, int lv, int rv) +FUNCTION(mix_chan): @ (int *SSumLR, int count, int lv, int rv) vmov.32 d14[0], r2 vmov.32 d14[1], r3 @ multipliers - load_varadr r2, SSumLR - mov r12, r0 + mov r2, r0 load_varadr r0, ChanBuf - ldr r2, [r2] - add r0, r12, lsl #2 - add r2, r12, lsl #3 0: vldmia r0!, {d0-d1} vldmia r2, {d2-d5} @@ -70,17 +65,12 @@ mc_finish: bx lr -FUNCTION(mix_chan_rvb): @ (int start, int count, int lv, int rv, int *rvb) +FUNCTION(mix_chan_rvb): @ (int *SSumLR, int count, int lv, int rv, int *rvb) vmov.32 d14[0], r2 vmov.32 d14[1], r3 @ multipliers - load_varadr r2, SSumLR - mov r12, r0 + mov r2, r0 load_varadr r0, ChanBuf ldr r3, [sp] @ rvb - ldr r2, [r2] - add r0, r12, lsl #2 - add r2, r12, lsl #3 - add r3, r12, lsl #3 0: vldmia r0!, {d0-d1} vldmia r2, {d2-d5} @@ -114,16 +104,12 @@ mcr_finish: #elif defined(HAVE_ARMV5) -FUNCTION(mix_chan): @ (int start, int count, int lv, int rv) +FUNCTION(mix_chan): @ (int *SSumLR, int count, int lv, int rv) stmfd sp!, {r4-r8,lr} orr r3, r2, r3, lsl #16 lsl r3, #1 @ packed multipliers << 1 - load_varadr r2, SSumLR - mov r12, r0 + mov r2, r0 load_varadr r0, ChanBuf - ldr r2, [r2] - add r0, r12, lsl #2 - add r2, r12, lsl #3 0: ldmia r0!, {r4,r5} ldmia r2, {r6-r8,lr} @@ -144,17 +130,13 @@ mc_finish: ldmfd sp!, {r4-r8,pc} -FUNCTION(mix_chan_rvb): @ (int start, int count, int lv, int rv, int *rvb) +FUNCTION(mix_chan_rvb): @ (int *SSumLR, int count, int lv, int rv, int *rvb) stmfd sp!, {r4-r8,lr} orr lr, r2, r3, lsl #16 lsl lr, #1 - load_varadr r2, SSumLR - ldr r3, [sp] @ rvb - ldr r2, [r2] - load_varadr r4, ChanBuf - add r2, r2, r0, lsl #3 - add r3, r3, r0, lsl #3 - add r0, r4, r0, lsl #2 + mov r2, r0 + load_varadr r0, ChanBuf + ldr r3, [sp, #6*4] @ rvb 0: ldr r4, [r0], #4 ldmia r2, {r6,r7} diff --git a/plugins/dfsound/externals.h b/plugins/dfsound/externals.h index d3bcbc6..4832fac 100644 --- a/plugins/dfsound/externals.h +++ b/plugins/dfsound/externals.h @@ -206,8 +206,6 @@ typedef struct void (CALLBACK *cddavCallback)(unsigned short,unsigned short);
void (CALLBACK *scheduleCallback)(unsigned int);
- int * sRVBStart;
-
xa_decode_t * xapGlobal;
unsigned int * XAFeed;
unsigned int * XAPlay;
@@ -228,7 +226,10 @@ typedef struct SPUCHAN * s_chan;
int * SB;
- int pad[30];
+ int * RVB;
+ int * SSumLR;
+
+ int pad[29];
unsigned short regArea[0x400];
} SPUInfo;
diff --git a/plugins/dfsound/reverb.c b/plugins/dfsound/reverb.c index b7bcf12..7e32b8e 100644 --- a/plugins/dfsound/reverb.c +++ b/plugins/dfsound/reverb.c @@ -40,15 +40,6 @@ INLINE void StartREVERB(int ch) }
////////////////////////////////////////////////////////////////////////
-// HELPER FOR NEILL'S REVERB: re-inits our reverb mixing buf
-////////////////////////////////////////////////////////////////////////
-
-INLINE void InitREVERB(int ns_to)
-{
- memset(spu.sRVBStart,0,ns_to*sizeof(spu.sRVBStart[0])*2);
-}
-
-////////////////////////////////////////////////////////////////////////
INLINE int rvb2ram_offs(int curr, int space, int iOff)
{
@@ -73,7 +64,7 @@ INLINE int rvb2ram_offs(int curr, int space, int iOff) ////////////////////////////////////////////////////////////////////////
// portions based on spu2-x from PCSX2
-static void MixREVERB(int ns_to)
+static void MixREVERB(int *SSumLR, int *RVB, int ns_to)
{
int l_old = rvb.iRVBLeft;
int r_old = rvb.iRVBRight;
@@ -87,8 +78,8 @@ static void MixREVERB(int ns_to) int ACC0, ACC1, FB_A0, FB_A1, FB_B0, FB_B1;
int mix_dest_a0, mix_dest_a1, mix_dest_b0, mix_dest_b1;
- int input_L = spu.sRVBStart[ns] * rvb.IN_COEF_L;
- int input_R = spu.sRVBStart[ns+1] * rvb.IN_COEF_R;
+ int input_L = RVB[ns] * rvb.IN_COEF_L;
+ int input_R = RVB[ns+1] * rvb.IN_COEF_R;
int IIR_INPUT_A0 = ((g_buffer(IIR_SRC_A0) * rvb.IIR_COEF) + input_L) >> 15;
int IIR_INPUT_A1 = ((g_buffer(IIR_SRC_A1) * rvb.IIR_COEF) + input_R) >> 15;
@@ -158,7 +149,7 @@ static void MixREVERB(int ns_to) rvb.CurrAddr = curr_addr;
}
-static void MixREVERB_off(int ns_to)
+static void MixREVERB_off(int *SSumLR, int ns_to)
{
int l_old = rvb.iRVBLeft;
int r_old = rvb.iRVBRight;
@@ -236,7 +227,7 @@ static void prepare_offsets(void) rvb.dirty = 0;
}
-INLINE void REVERBDo(int ns_to)
+INLINE void REVERBDo(int *SSumLR, int *RVB, int ns_to)
{
if (!rvb.StartAddr) // reverb is off
{
@@ -249,14 +240,14 @@ INLINE void REVERBDo(int ns_to) if (unlikely(rvb.dirty))
prepare_offsets();
- MixREVERB(ns_to);
+ MixREVERB(SSumLR, RVB, ns_to);
}
else if (rvb.VolLeft || rvb.VolRight)
{
if (unlikely(rvb.dirty))
prepare_offsets();
- MixREVERB_off(ns_to);
+ MixREVERB_off(SSumLR, ns_to);
}
else // -> reverb off
{
diff --git a/plugins/dfsound/spu.c b/plugins/dfsound/spu.c index c6a06fd..f5edd3a 100644 --- a/plugins/dfsound/spu.c +++ b/plugins/dfsound/spu.c @@ -77,47 +77,8 @@ SPUConfig spu_config; REVERBInfo rvb; -#if defined(THREAD_ENABLED) || defined(WANT_THREAD_CODE) - -// worker thread state -static struct spu_worker { - unsigned int pending:1; - unsigned int exit_thread:1; - unsigned int stale_cache:1; - int ns_to; - int ctrl; - int decode_pos; - int silentch; - unsigned int chmask; - struct { - int spos; - int sbpos; - int sinc; - int start; - int loop; - int ns_to; - ADSRInfoEx adsr; - // might want to add vol and fmod flags.. - } ch[24]; - struct { - struct { - int adsrState; - int adsrEnvelopeVol; - } ch[24]; - unsigned int chan_end; - unsigned int decode_dirty; - } r; -} *worker; - -#else -static const void * const worker = NULL; -#endif - -// certain globals (were local before, but with the new timeproc I need em global) - static int iFMod[NSSIZE]; int ChanBuf[NSSIZE]; -int *SSumLR; #define CDDA_BUFFER_SIZE (16384 * sizeof(uint32_t)) // must be power of 2 @@ -267,17 +228,8 @@ static int check_irq(int ch, unsigned char *pos) // START SOUND... called by main thread to setup a new sound on a channel //////////////////////////////////////////////////////////////////////// -INLINE void StartSound(int ch) +static void StartSoundSB(int *SB) { - SPUCHAN *s_chan = &spu.s_chan[ch]; - int *SB = spu.SB + ch * SB_SIZE; - - StartADSR(ch); - StartREVERB(ch); - - s_chan->prevflags=2; - - s_chan->iSBPos=27; SB[26]=0; // init mixing vars SB[27]=0; @@ -285,6 +237,17 @@ INLINE void StartSound(int ch) SB[29]=0; // init our interpolation helpers SB[30]=0; SB[31]=0; +} + +static void StartSoundMain(int ch) +{ + SPUCHAN *s_chan = &spu.s_chan[ch]; + + StartADSR(ch); + StartREVERB(ch); + + s_chan->prevflags=2; + s_chan->iSBPos=27; s_chan->spos=0; spu.dwNewChannel&=~(1<<ch); // clear new channel bit @@ -292,6 +255,12 @@ INLINE void StartSound(int ch) spu.dwChannelDead&=~(1<<ch); } +static void StartSound(int ch) +{ + StartSoundMain(ch); + StartSoundSB(spu.SB + ch * SB_SIZE); +} + //////////////////////////////////////////////////////////////////////// // ALL KIND OF HELPERS //////////////////////////////////////////////////////////////////////// @@ -439,7 +408,7 @@ static void decode_block_data(int *dest, const unsigned char *src, int predict_n } } -static int decode_block(int ch, int *SB) +static int decode_block(void *unused, int ch, int *SB) { SPUCHAN *s_chan = &spu.s_chan[ch]; unsigned char *start; @@ -520,38 +489,6 @@ static int skip_block(int ch) return ret; } -#if defined(THREAD_ENABLED) || defined(WANT_THREAD_CODE) - -static int decode_block_work(int ch, int *SB) -{ - const unsigned char *ram = spu.spuMemC; - int predict_nr, shift_factor, flags; - int start = worker->ch[ch].start; - int loop = worker->ch[ch].loop; - - predict_nr = ram[start]; - shift_factor = predict_nr & 0xf; - predict_nr >>= 4; - - decode_block_data(SB, ram + start + 2, predict_nr, shift_factor); - - flags = ram[start + 1]; - if (flags & 4) - loop = start; // loop adress - - start += 16; - - if (flags & 1) // 1: stop/loop - start = loop; - - worker->ch[ch].start = start & 0x7ffff; - worker->ch[ch].loop = loop; - - return 0; -} - -#endif - // if irq is going to trigger sooner than in upd_samples, set upd_samples static void scan_for_irq(int ch, unsigned int *upd_samples) { @@ -594,8 +531,9 @@ static void scan_for_irq(int ch, unsigned int *upd_samples) } #define make_do_samples(name, fmod_code, interp_start, interp1_code, interp2_code, interp_end) \ -static noinline int do_samples_##name(int (*decode_f)(int ch, int *SB), int ch, \ - int ns_to, int *SB, int sinc, int *spos, int *sbpos) \ +static noinline int do_samples_##name( \ + int (*decode_f)(void *context, int ch, int *SB), void *ctx, \ + int ch, int ns_to, int *SB, int sinc, int *spos, int *sbpos) \ { \ int ns, d, fa; \ int ret = ns_to; \ @@ -612,7 +550,7 @@ static noinline int do_samples_##name(int (*decode_f)(int ch, int *SB), int ch, if (*sbpos >= 28) \ { \ *sbpos = 0; \ - d = decode_f(ch, SB); \ + d = decode_f(ctx, ch, SB); \ if (d && ns < ret) \ ret = ns; \ } \ @@ -723,13 +661,12 @@ static int do_samples_noise(int ch, int ns_to) #ifdef HAVE_ARMV5 // asm code; lv and rv must be 0-3fff -extern void mix_chan(int start, int count, int lv, int rv); -extern void mix_chan_rvb(int start, int count, int lv, int rv, int *rvb); +extern void mix_chan(int *SSumLR, int count, int lv, int rv); +extern void mix_chan_rvb(int *SSumLR, int count, int lv, int rv, int *rvb); #else -static void mix_chan(int start, int count, int lv, int rv) +static void mix_chan(int *SSumLR, int count, int lv, int rv) { - int *dst = SSumLR + start * 2; - const int *src = ChanBuf + start; + const int *src = ChanBuf; int l, r; while (count--) @@ -738,16 +675,16 @@ static void mix_chan(int start, int count, int lv, int rv) l = (sval * lv) >> 14; r = (sval * rv) >> 14; - *dst++ += l; - *dst++ += r; + *SSumLR++ += l; + *SSumLR++ += r; } } -static void mix_chan_rvb(int start, int count, int lv, int rv, int *rvb) +static void mix_chan_rvb(int *SSumLR, int count, int lv, int rv, int *rvb) { - int *dst = SSumLR + start * 2; - int *drvb = rvb + start * 2; - const int *src = ChanBuf + start; + const int *src = ChanBuf; + int *dst = SSumLR; + int *drvb = rvb; int l, r; while (count--) @@ -828,7 +765,13 @@ static void do_channels(int ns_to) int *SB, sinc; int ch, d; - InitREVERB(ns_to); + memset(spu.RVB, 0, ns_to * sizeof(spu.RVB[0]) * 2); + + mask = spu.dwNewChannel & 0xffffff; + for (ch = 0; mask != 0; ch++, mask >>= 1) { + if (mask & 1) + StartSound(ch); + } mask = spu.dwChannelOn & 0xffffff; for (ch = 0; mask != 0; ch++, mask >>= 1) // loop em all... @@ -843,13 +786,13 @@ static void do_channels(int ns_to) d = do_samples_noise(ch, ns_to); else if (s_chan->bFMod == 2 || (s_chan->bFMod == 0 && spu_config.iUseInterpolation == 0)) - d = do_samples_noint(decode_block, ch, ns_to, + d = do_samples_noint(decode_block, NULL, ch, ns_to, SB, sinc, &s_chan->spos, &s_chan->iSBPos); else if (s_chan->bFMod == 0 && spu_config.iUseInterpolation == 1) - d = do_samples_simple(decode_block, ch, ns_to, + d = do_samples_simple(decode_block, NULL, ch, ns_to, SB, sinc, &s_chan->spos, &s_chan->iSBPos); else - d = do_samples_default(decode_block, ch, ns_to, + d = do_samples_default(decode_block, NULL, ch, ns_to, SB, sinc, &s_chan->spos, &s_chan->iSBPos); d = MixADSR(&s_chan->ADSRX, d); @@ -868,150 +811,240 @@ static void do_channels(int ns_to) if (s_chan->bFMod == 2) // fmod freq channel memcpy(iFMod, &ChanBuf, ns_to * sizeof(iFMod[0])); if (s_chan->bRVBActive) - mix_chan_rvb(0, ns_to, s_chan->iLeftVolume, s_chan->iRightVolume, spu.sRVBStart); + mix_chan_rvb(spu.SSumLR, ns_to, s_chan->iLeftVolume, s_chan->iRightVolume, spu.RVB); else - mix_chan(0, ns_to, s_chan->iLeftVolume, s_chan->iRightVolume); + mix_chan(spu.SSumLR, ns_to, s_chan->iLeftVolume, s_chan->iRightVolume); } } -static void do_samples_finish(int ns_to, int silentch, int decode_pos); +static void do_samples_finish(int *SSumLR, int *RVB, int ns_to, + int silentch, int decode_pos); // optional worker thread handling #if defined(THREAD_ENABLED) || defined(WANT_THREAD_CODE) +// worker thread state +static struct spu_worker { + union { + struct { + unsigned int exit_thread; + unsigned int i_ready; + unsigned int i_reaped; + unsigned int req_sent; // dsp + unsigned int last_boot_cnt; + }; + // aligning for C64X_DSP + unsigned int _pad0[128/4]; + }; + union { + struct { + unsigned int i_done; + unsigned int active; // dsp + unsigned int boot_cnt; + }; + unsigned int _pad1[128/4]; + }; + struct work_item { + int ns_to; + int ctrl; + int decode_pos; + unsigned int channels_new; + unsigned int channels_on; + unsigned int channels_silent; + struct { + int spos; + int sbpos; + int sinc; + int start; + int loop; + int ns_to; + ADSRInfoEx adsr; + // might want to add vol and fmod flags.. + } ch[24]; + int RVB[NSSIZE * 2]; + int SSumLR[NSSIZE * 2]; + } i[4]; +} *worker; + +#define WORK_MAXCNT (sizeof(worker->i) / sizeof(worker->i[0])) +#define WORK_I_MASK (WORK_MAXCNT - 1) + static void thread_work_start(void); -static void thread_work_wait_sync(void); -static void thread_sync_caches(void); +static void thread_work_wait_sync(struct work_item *work, int force); +static int thread_get_i_done(void); + +static int decode_block_work(void *context, int ch, int *SB) +{ + const unsigned char *ram = spu.spuMemC; + int predict_nr, shift_factor, flags; + struct work_item *work = context; + int start = work->ch[ch].start; + int loop = work->ch[ch].loop; + + predict_nr = ram[start]; + shift_factor = predict_nr & 0xf; + predict_nr >>= 4; + + decode_block_data(SB, ram + start + 2, predict_nr, shift_factor); + + flags = ram[start + 1]; + if (flags & 4) + loop = start; // loop adress + + start += 16; + + if (flags & 1) // 1: stop/loop + start = loop; + + work->ch[ch].start = start & 0x7ffff; + work->ch[ch].loop = loop; + + return 0; +} -static void queue_channel_work(int ns_to, int silentch) +static void queue_channel_work(int ns_to, unsigned int silentch) { - const SPUCHAN *s_chan; + struct work_item *work; + SPUCHAN *s_chan; unsigned int mask; - int ch; + int ch, d; - worker->ns_to = ns_to; - worker->ctrl = spu.spuCtrl; - worker->decode_pos = spu.decode_pos; - worker->silentch = silentch; + work = &worker->i[worker->i_ready & WORK_I_MASK]; + work->ns_to = ns_to; + work->ctrl = spu.spuCtrl; + work->decode_pos = spu.decode_pos; + work->channels_silent = silentch; + + mask = work->channels_new = spu.dwNewChannel & 0xffffff; + for (ch = 0; mask != 0; ch++, mask >>= 1) { + if (mask & 1) + StartSoundMain(ch); + } + + mask = work->channels_on = spu.dwChannelOn & 0xffffff; + spu.decode_dirty_ch |= mask & 0x0a; - mask = worker->chmask = spu.dwChannelOn & 0xffffff; for (ch = 0; mask != 0; ch++, mask >>= 1) { if (!(mask & 1)) continue; s_chan = &spu.s_chan[ch]; - worker->ch[ch].spos = s_chan->spos; - worker->ch[ch].sbpos = s_chan->iSBPos; - worker->ch[ch].sinc = s_chan->sinc; - worker->ch[ch].adsr = s_chan->ADSRX; - worker->ch[ch].start = s_chan->pCurr - spu.spuMemC; - worker->ch[ch].loop = s_chan->pLoop - spu.spuMemC; + work->ch[ch].spos = s_chan->spos; + work->ch[ch].sbpos = s_chan->iSBPos; + work->ch[ch].sinc = s_chan->sinc; + work->ch[ch].adsr = s_chan->ADSRX; + work->ch[ch].start = s_chan->pCurr - spu.spuMemC; + work->ch[ch].loop = s_chan->pLoop - spu.spuMemC; if (s_chan->prevflags & 1) - worker->ch[ch].start = worker->ch[ch].loop; + work->ch[ch].start = work->ch[ch].loop; - worker->ch[ch].ns_to = do_samples_skip(ch, ns_to); + d = do_samples_skip(ch, ns_to); + work->ch[ch].ns_to = d; + + // note: d is not accurate on skip + d = SkipADSR(&s_chan->ADSRX, d); + if (d < ns_to) { + spu.dwChannelOn &= ~(1 << ch); + s_chan->ADSRX.EnvelopeVol = 0; + } } - worker->pending = 1; + worker->i_ready++; thread_work_start(); } -static void do_channel_work(void) +static void do_channel_work(struct work_item *work) { - unsigned int mask, endmask = 0; + unsigned int mask; unsigned int decode_dirty_ch = 0; int *SB, sinc, spos, sbpos; int d, ch, ns_to; SPUCHAN *s_chan; - ns_to = worker->ns_to; - memset(spu.sRVBStart, 0, ns_to * sizeof(spu.sRVBStart[0]) * 2); + ns_to = work->ns_to; + memset(work->RVB, 0, ns_to * sizeof(work->RVB[0]) * 2); - mask = worker->chmask; + mask = work->channels_new; + for (ch = 0; mask != 0; ch++, mask >>= 1) { + if (mask & 1) + StartSoundSB(spu.SB + ch * SB_SIZE); + } + + mask = work->channels_on; for (ch = 0; mask != 0; ch++, mask >>= 1) { if (!(mask & 1)) continue; - d = worker->ch[ch].ns_to; - spos = worker->ch[ch].spos; - sbpos = worker->ch[ch].sbpos; - sinc = worker->ch[ch].sinc; + d = work->ch[ch].ns_to; + spos = work->ch[ch].spos; + sbpos = work->ch[ch].sbpos; + sinc = work->ch[ch].sinc; s_chan = &spu.s_chan[ch]; SB = spu.SB + ch * SB_SIZE; if (s_chan->bNoise) - do_lsfr_samples(d, worker->ctrl, &spu.dwNoiseCount, &spu.dwNoiseVal); + do_lsfr_samples(d, work->ctrl, &spu.dwNoiseCount, &spu.dwNoiseVal); else if (s_chan->bFMod == 2 || (s_chan->bFMod == 0 && spu_config.iUseInterpolation == 0)) - do_samples_noint(decode_block_work, ch, d, SB, sinc, &spos, &sbpos); + do_samples_noint(decode_block_work, work, ch, d, SB, sinc, &spos, &sbpos); else if (s_chan->bFMod == 0 && spu_config.iUseInterpolation == 1) - do_samples_simple(decode_block_work, ch, d, SB, sinc, &spos, &sbpos); + do_samples_simple(decode_block_work, work, ch, d, SB, sinc, &spos, &sbpos); else - do_samples_default(decode_block_work, ch, d, SB, sinc, &spos, &sbpos); + do_samples_default(decode_block_work, work, ch, d, SB, sinc, &spos, &sbpos); - d = MixADSR(&worker->ch[ch].adsr, d); + d = MixADSR(&work->ch[ch].adsr, d); if (d < ns_to) { - endmask |= 1 << ch; - worker->ch[ch].adsr.EnvelopeVol = 0; + work->ch[ch].adsr.EnvelopeVol = 0; memset(&ChanBuf[d], 0, (ns_to - d) * sizeof(ChanBuf[0])); } - worker->r.ch[ch].adsrState = worker->ch[ch].adsr.State; - worker->r.ch[ch].adsrEnvelopeVol = worker->ch[ch].adsr.EnvelopeVol; if (ch == 1 || ch == 3) { - do_decode_bufs(spu.spuMem, ch/2, ns_to, worker->decode_pos); + do_decode_bufs(spu.spuMem, ch/2, ns_to, work->decode_pos); decode_dirty_ch |= 1 << ch; } if (s_chan->bFMod == 2) // fmod freq channel memcpy(iFMod, &ChanBuf, ns_to * sizeof(iFMod[0])); if (s_chan->bRVBActive) - mix_chan_rvb(0, ns_to, s_chan->iLeftVolume, s_chan->iRightVolume, spu.sRVBStart); + mix_chan_rvb(work->SSumLR, ns_to, + s_chan->iLeftVolume, s_chan->iRightVolume, work->RVB); else - mix_chan(0, ns_to, s_chan->iLeftVolume, s_chan->iRightVolume); + mix_chan(work->SSumLR, ns_to, s_chan->iLeftVolume, s_chan->iRightVolume); } - - worker->r.chan_end = endmask; - worker->r.decode_dirty = decode_dirty_ch; } -static void sync_worker_thread(int do_direct) +static void sync_worker_thread(int force) { - unsigned int mask; - int ch; + struct work_item *work; + int done, used_space; - if (do_direct) - thread_sync_caches(); - if (!worker->pending) - return; + done = thread_get_i_done() - worker->i_reaped; + used_space = worker->i_ready - worker->i_reaped; + //printf("done: %d use: %d dsp: %u/%u\n", done, used_space, + // worker->boot_cnt, worker->i_done); - thread_work_wait_sync(); - worker->pending = 0; + while ((force && used_space > 0) || used_space >= WORK_MAXCNT || done > 0) { + work = &worker->i[worker->i_reaped & WORK_I_MASK]; + thread_work_wait_sync(work, force); - mask = worker->chmask; - for (ch = 0; mask != 0; ch++, mask >>= 1) { - if (!(mask & 1)) continue; + do_samples_finish(work->SSumLR, work->RVB, work->ns_to, + work->channels_silent, work->decode_pos); - // be sure there was no keyoff while thread was working - if (spu.s_chan[ch].ADSRX.State != ADSR_RELEASE) - spu.s_chan[ch].ADSRX.State = worker->r.ch[ch].adsrState; - spu.s_chan[ch].ADSRX.EnvelopeVol = worker->r.ch[ch].adsrEnvelopeVol; + worker->i_reaped++; + done = thread_get_i_done() - worker->i_reaped; + used_space = worker->i_ready - worker->i_reaped; } - - spu.dwChannelOn &= ~worker->r.chan_end; - spu.decode_dirty_ch |= worker->r.decode_dirty; - - do_samples_finish(worker->ns_to, worker->silentch, - worker->decode_pos); } #else static void queue_channel_work(int ns_to, int silentch) {} -static void sync_worker_thread(int do_direct) {} +static void sync_worker_thread(int force) {} + +static const void * const worker = NULL; #endif // THREAD_ENABLED @@ -1022,10 +1055,9 @@ static void sync_worker_thread(int do_direct) {} void do_samples(unsigned int cycles_to, int do_direct) { - unsigned int mask; - int ch, ns_to; - int silentch; + unsigned int silentch; int cycle_diff; + int ns_to; cycle_diff = cycles_to - spu.cycles_played; if (cycle_diff < -2*1048576 || cycle_diff > 2*1048576) @@ -1035,7 +1067,9 @@ void do_samples(unsigned int cycles_to, int do_direct) return; } - do_direct |= (cycle_diff < 64 * 768); + silentch = ~(spu.dwChannelOn | spu.dwNewChannel) & 0xffffff; + + do_direct |= (silentch == 0xffffff); if (worker != NULL) sync_worker_thread(do_direct); @@ -1078,26 +1112,12 @@ void do_samples(unsigned int cycles_to, int do_direct) } } - mask = spu.dwNewChannel & 0xffffff; - for (ch = 0; mask != 0; ch++, mask >>= 1) { - if (mask & 1) - StartSound(ch); - } - - silentch = ~spu.dwChannelOn & 0xffffff; - - if (spu.dwChannelOn == 0) { - InitREVERB(ns_to); - do_samples_finish(ns_to, silentch, spu.decode_pos); + if (do_direct || worker == NULL || !spu_config.iUseThread) { + do_channels(ns_to); + do_samples_finish(spu.SSumLR, spu.RVB, ns_to, silentch, spu.decode_pos); } else { - if (do_direct || worker == NULL || !spu_config.iUseThread) { - do_channels(ns_to); - do_samples_finish(ns_to, silentch, spu.decode_pos); - } - else { - queue_channel_work(ns_to, silentch); - } + queue_channel_work(ns_to, silentch); } // advance "stopped" channels that can cause irqs @@ -1109,13 +1129,15 @@ void do_samples(unsigned int cycles_to, int do_direct) spu.decode_pos = (spu.decode_pos + ns_to) & 0x1ff; } -static void do_samples_finish(int ns_to, int silentch, int decode_pos) +static void do_samples_finish(int *SSumLR, int *RVB, int ns_to, + int silentch, int decode_pos) { int volmult = spu_config.iVolume; int ns; int d; - if(unlikely(silentch & spu.decode_dirty_ch & (1<<1))) // must clear silent channel decode buffers + // must clear silent channel decode buffers + if(unlikely(silentch & spu.decode_dirty_ch & (1<<1))) { memset(&spu.spuMem[0x800/2], 0, 0x400); spu.decode_dirty_ch &= ~(1<<1); @@ -1129,13 +1151,13 @@ static void do_samples_finish(int ns_to, int silentch, int decode_pos) //---------------------------------------------------// // mix XA infos (if any) - MixXA(ns_to, decode_pos); + MixXA(SSumLR, ns_to, decode_pos); /////////////////////////////////////////////////////// // mix all channels (including reverb) into one buffer if(spu_config.iUseReverb) - REVERBDo(ns_to); + REVERBDo(SSumLR, RVB, ns_to); if((spu.spuCtrl&0x4000)==0) // muted? (rare, don't optimize for this) { @@ -1254,7 +1276,6 @@ int CALLBACK SPUplayCDDAchannel(short *pcm, int nbytes) // to be called after state load void ClearWorkingState(void) { - memset(SSumLR, 0, NSSIZE * 2 * 4); // init some mixing buffers memset(iFMod, 0, sizeof(iFMod)); spu.pS=(short *)spu.pSpuBuffer; // setup soundbuffer pointer } @@ -1265,8 +1286,8 @@ void SetupStreams(void) int i; spu.pSpuBuffer = (unsigned char *)malloc(32768); // alloc mixing buffer - spu.sRVBStart = calloc(NSSIZE * 2, sizeof(spu.sRVBStart[0])); - SSumLR = calloc(NSSIZE * 2, sizeof(SSumLR[0])); + spu.RVB = calloc(NSSIZE * 2, sizeof(spu.RVB[0])); + spu.SSumLR = calloc(NSSIZE * 2, sizeof(spu.SSumLR[0])); spu.XAStart = // alloc xa buffer (uint32_t *)malloc(44100 * sizeof(uint32_t)); @@ -1298,10 +1319,10 @@ void RemoveStreams(void) { free(spu.pSpuBuffer); // free mixing buffer spu.pSpuBuffer = NULL; - free(spu.sRVBStart); // free reverb buffer - spu.sRVBStart = NULL; - free(SSumLR); - SSumLR = NULL; + free(spu.RVB); // free reverb buffer + spu.RVB = NULL; + free(spu.SSumLR); + spu.SSumLR = NULL; free(spu.XAStart); // free XA buffer spu.XAStart = NULL; free(spu.CDDAStart); // free CDDA buffer @@ -1332,23 +1353,28 @@ static void thread_work_start(void) sem_post(&t.sem_avail); } -static void thread_work_wait_sync(void) +static void thread_work_wait_sync(struct work_item *work, int force) { sem_wait(&t.sem_done); } -static void thread_sync_caches(void) +static int thread_get_i_done(void) { + return worker->i_done; } static void *spu_worker_thread(void *unused) { + struct work_item *work; + while (1) { sem_wait(&t.sem_avail); if (worker->exit_thread) break; - do_channel_work(); + work = &worker->i[worker->i_done & WORK_I_MASK]; + do_channel_work(work); + worker->i_done++; sem_post(&t.sem_done); } @@ -1377,6 +1403,7 @@ static void init_spu_thread(void) if (ret != 0) goto fail_thread; + spu_config.iThreadAvail = 1; return; fail_thread: @@ -1386,6 +1413,7 @@ fail_sem_done: fail_sem_avail: free(worker); worker = NULL; + spu_config.iThreadAvail = 0; } static void exit_spu_thread(void) diff --git a/plugins/dfsound/spu_c64x.c b/plugins/dfsound/spu_c64x.c index d829d29..200ab38 100644 --- a/plugins/dfsound/spu_c64x.c +++ b/plugins/dfsound/spu_c64x.c @@ -23,13 +23,11 @@ #include <dlfcn.h> #include <stddef.h> +#include <unistd.h> #include <inc_libc64_mini.h> #include "spu_c64x.h" -static dsp_mem_region_t region; -static dsp_component_id_t compid; - static struct { void *handle; int (*dsp_open)(void); @@ -42,49 +40,114 @@ static struct { int (*dsp_rpc_recv)(dsp_msg_t *_msgFrom); int (*dsp_rpc)(const dsp_msg_t *_msgTo, dsp_msg_t *_msgFrom); void (*dsp_logbuf_print)(void); + + dsp_mem_region_t region; + dsp_component_id_t compid; } f; static void thread_work_start(void) { + struct region_mem *mem; dsp_msg_t msg; int ret; - DSP_MSG_INIT(&msg, compid, CCMD_DOIT, 0, 0); + // make sure new work is written out + __sync_synchronize(); + + // this should be safe, as dsp checks for new work even + // after it decrements ->active + // cacheline: i_done, active + f.dsp_cache_inv_virt(&worker->i_done, 64); + if (worker->active == ACTIVE_CNT) + return; + + // to start the DSP, dsp_rpc_send() must be used, + // but before that, previous request must be finished + if (worker->req_sent) { + if (worker->boot_cnt == worker->last_boot_cnt) { + // hopefully still booting + //printf("booting?\n"); + return; + } + + ret = f.dsp_rpc_recv(&msg); + if (ret != 0) { + fprintf(stderr, "dsp_rpc_recv failed: %d\n", ret); + f.dsp_logbuf_print(); + worker->req_sent = 0; + spu_config.iUseThread = 0; + return; + } + } + + f.dsp_cache_inv_virt(&worker->i_done, 64); + worker->last_boot_cnt = worker->boot_cnt; + + mem = (void *)f.region.virt_addr; + memcpy(&mem->spu_config, &spu_config, sizeof(mem->spu_config)); + + DSP_MSG_INIT(&msg, f.compid, CCMD_DOIT, f.region.phys_addr, 0); ret = f.dsp_rpc_send(&msg); if (ret != 0) { fprintf(stderr, "dsp_rpc_send failed: %d\n", ret); f.dsp_logbuf_print(); - // maybe stop using the DSP? + spu_config.iUseThread = 0; + return; } + worker->req_sent = 1; } -static void thread_work_wait_sync(void) +static int thread_get_i_done(void) { - dsp_msg_t msg; + f.dsp_cache_inv_virt(&worker->i_done, sizeof(worker->i_done)); + return worker->i_done; +} + +static void thread_work_wait_sync(struct work_item *work, int force) +{ + int limit = 1000; int ns_to; - int ret; - ns_to = worker->ns_to; - f.dsp_cache_inv_virt(spu.sRVBStart, sizeof(spu.sRVBStart[0]) * 2 * ns_to); - f.dsp_cache_inv_virt(SSumLR, sizeof(SSumLR[0]) * 2 * ns_to); - f.dsp_cache_inv_virt(&worker->r, sizeof(worker->r)); - worker->stale_cache = 1; // SB, ram + ns_to = work->ns_to; + f.dsp_cache_inv_virt(work->RVB, sizeof(work->RVB[0]) * 2 * ns_to); + f.dsp_cache_inv_virt(work->SSumLR, sizeof(work->SSumLR[0]) * 2 * ns_to); + __builtin_prefetch(work->RVB); + __builtin_prefetch(work->SSumLR); - ret = f.dsp_rpc_recv(&msg); - if (ret != 0) { - fprintf(stderr, "dsp_rpc_recv failed: %d\n", ret); - f.dsp_logbuf_print(); + while (worker->i_done == worker->i_reaped && limit-- > 0) { + if (!worker->active) { + printf("dsp: broken sync\n"); + worker->last_boot_cnt = ~0; + break; + } + + usleep(500); + f.dsp_cache_inv_virt(&worker->i_done, 64); } - //f.dsp_logbuf_print(); -} -// called before ARM decides to do SPU mixing itself -static void thread_sync_caches(void) -{ - if (worker->stale_cache) { + if (limit == 0) + printf("dsp: wait timeout\n"); + + // still in results loop? + if (worker->i_reaped != worker->i_done - 1) + return; + + if (worker->req_sent && (force || worker->i_done == worker->i_ready)) { + dsp_msg_t msg; + int ret; + + ret = f.dsp_rpc_recv(&msg); + if (ret != 0) { + fprintf(stderr, "dsp_rpc_recv failed: %d\n", ret); + f.dsp_logbuf_print(); + spu_config.iUseThread = 0; + } + worker->req_sent = 0; + } + + if (force) { f.dsp_cache_inv_virt(spu.SB, sizeof(spu.SB[0]) * SB_SIZE * 24); f.dsp_cache_inv_virt(spu.spuMemC + 0x800, 0x800); - worker->stale_cache = 0; } } @@ -101,7 +164,7 @@ static void init_spu_thread(void) f.handle = dlopen(lib, RTLD_NOW); if (f.handle == NULL) { fprintf(stderr, "can't load %s: %s\n", lib, dlerror()); - return; + goto fail_open; } #define LDS(name) \ failed |= (f.name = dlsym(f.handle, #name)) == NULL @@ -120,32 +183,32 @@ static void init_spu_thread(void) fprintf(stderr, "missing symbol(s) in %s\n", lib); dlclose(f.handle); f.handle = NULL; - return; + goto fail_open; } } ret = f.dsp_open(); if (ret != 0) { fprintf(stderr, "dsp_open failed: %d\n", ret); - return; + goto fail_open; } - ret = f.dsp_component_load(NULL, COMPONENT_NAME, &compid); + ret = f.dsp_component_load(NULL, COMPONENT_NAME, &f.compid); if (ret != 0) { fprintf(stderr, "dsp_component_load failed: %d\n", ret); goto fail_cload; } - region = f.dsp_shm_alloc(DSP_CACHE_R, sizeof(*mem)); // writethrough - if (region.size < sizeof(*mem) || region.virt_addr == 0) { + f.region = f.dsp_shm_alloc(DSP_CACHE_R, sizeof(*mem)); // writethrough + if (f.region.size < sizeof(*mem) || f.region.virt_addr == 0) { fprintf(stderr, "dsp_shm_alloc failed\n"); goto fail_mem; } - mem = (void *)region.virt_addr; + mem = (void *)f.region.virt_addr; memcpy(&mem->spu_config, &spu_config, sizeof(mem->spu_config)); - DSP_MSG_INIT(&init_msg, compid, CCMD_INIT, region.phys_addr, 0); + DSP_MSG_INIT(&init_msg, f.compid, CCMD_INIT, f.region.phys_addr, 0); ret = f.dsp_rpc(&init_msg, &msg_in); if (ret != 0) { fprintf(stderr, "dsp_rpc failed: %d\n", ret); @@ -162,56 +225,56 @@ static void init_spu_thread(void) mem->offsetof_s_chan1, offsetof(typeof(*mem), s_chan[1])); goto fail_init; } - if (mem->offsetof_worker_ram != offsetof(typeof(*mem), worker.ch[1])) { + if (mem->offsetof_spos_3_20 != offsetof(typeof(*mem), worker.i[3].ch[20])) { fprintf(stderr, "error: size mismatch 3: %d vs %zd\n", - mem->offsetof_worker_ram, offsetof(typeof(*mem), worker.ch[1])); + mem->offsetof_spos_3_20, offsetof(typeof(*mem), worker.i[3].ch[20])); goto fail_init; } // override default allocations free(spu.spuMemC); spu.spuMemC = mem->spu_ram; - free(spu.sRVBStart); - spu.sRVBStart = mem->RVB; - free(SSumLR); - SSumLR = mem->SSumLR; free(spu.SB); spu.SB = mem->SB; free(spu.s_chan); spu.s_chan = mem->s_chan; worker = &mem->worker; - printf("spu: C64x DSP ready (id=%d).\n", (int)compid); + printf("spu: C64x DSP ready (id=%d).\n", (int)f.compid); f.dsp_logbuf_print(); -pcnt_init(); + spu_config.iThreadAvail = 1; (void)do_channel_work; // used by DSP instead return; fail_init: - f.dsp_shm_free(region); + f.dsp_shm_free(f.region); fail_mem: // no component unload func? fail_cload: - printf("spu: C64x DSP init failed.\n"); f.dsp_logbuf_print(); f.dsp_close(); +fail_open: + printf("spu: C64x DSP init failed.\n"); + spu_config.iUseThread = spu_config.iThreadAvail = 0; worker = NULL; } static void exit_spu_thread(void) { + dsp_msg_t msg; + if (worker == NULL) return; - if (worker->pending) - thread_work_wait_sync(); - f.dsp_shm_free(region); + if (worker->req_sent) + f.dsp_rpc_recv(&msg); + + f.dsp_logbuf_print(); + f.dsp_shm_free(f.region); f.dsp_close(); spu.spuMemC = NULL; - spu.sRVBStart = NULL; - SSumLR = NULL; spu.SB = NULL; spu.s_chan = NULL; worker = NULL; diff --git a/plugins/dfsound/spu_c64x.h b/plugins/dfsound/spu_c64x.h index d4e73e9..bb20cc3 100644 --- a/plugins/dfsound/spu_c64x.h +++ b/plugins/dfsound/spu_c64x.h @@ -7,12 +7,10 @@ enum { struct region_mem { unsigned char spu_ram[512 * 1024]; - int RVB[NSSIZE * 2]; - int SSumLR[NSSIZE * 2]; int SB[SB_SIZE * 24]; // careful not to lose ARM writes by DSP overwriting // with old data when it's writing out neighbor cachelines - int _pad1[128/4 - ((NSSIZE * 4 + SB_SIZE * 24) & (128/4 - 1))]; + int _pad1[128/4 - ((SB_SIZE * 24) & (128/4 - 1))]; SPUCHAN s_chan[24 + 1]; int _pad2[128/4 - ((sizeof(SPUCHAN) * 25 / 4) & (128/4 - 1))]; struct spu_worker worker; @@ -20,6 +18,7 @@ struct region_mem { // init/debug int sizeof_region_mem; int offsetof_s_chan1; - int offsetof_worker_ram; + int offsetof_spos_3_20; }; +#define ACTIVE_CNT 3 diff --git a/plugins/dfsound/spu_c64x_dspcode.c b/plugins/dfsound/spu_c64x_dspcode.c index 117a296..97d3028 100644 --- a/plugins/dfsound/spu_c64x_dspcode.c +++ b/plugins/dfsound/spu_c64x_dspcode.c @@ -30,38 +30,80 @@ /* dummy deps, some bloat but avoids ifdef hell in SPU code.. */ static void thread_work_start(void) {} -static void thread_work_wait_sync(void) {} -static void thread_sync_caches(void) {} +static void thread_work_wait_sync(struct work_item *work, int force) {} +static int thread_get_i_done(void) { return 0; } struct out_driver *out_current; void SetupSound(void) {} -#if 0 -// no use, c64_tools does BCACHE_wbInvAll.. -static void sync_caches(void) + +static void invalidate_cache(struct work_item *work) +{ + syscalls.cache_inv(work, offsetof(typeof(*work), RVB), 1); + syscalls.cache_inv(spu.s_chan, sizeof(spu.s_chan[0]) * 24, 0); + syscalls.cache_inv(work->SSumLR, + sizeof(work->SSumLR[0]) * 2 * work->ns_to, 0); +} + +static void writeout_cache(struct work_item *work) { - int ns_to = worker->ns_to; + int ns_to = work->ns_to; + + syscalls.cache_wb(work->RVB, sizeof(work->RVB[0]) * 2 * ns_to, 1); + syscalls.cache_wb(work->SSumLR, sizeof(work->SSumLR[0]) * 2 * ns_to, 1); +} - syscalls.cache_wb(spu.sRVBStart, sizeof(spu.sRVBStart[0]) * 2 * ns_to, 1); - syscalls.cache_wb(SSumLR, sizeof(SSumLR[0]) * 2 * ns_to, 1); +static void do_processing(void) +{ + struct work_item *work; + int left, dirty = 0; - syscalls.cache_wbInv(worker, sizeof(*worker), 1); + while (worker->active) + { + // i_ready is in first cacheline + syscalls.cache_inv(worker, 64, 1); + + left = worker->i_ready - worker->i_done; + if (left > 0) { + dirty = 1; + worker->active = ACTIVE_CNT; + syscalls.cache_wb(&worker->active, 4, 1); + + work = &worker->i[worker->i_done & WORK_I_MASK]; + invalidate_cache(work); + do_channel_work(work); + writeout_cache(work); + + worker->i_done++; + syscalls.cache_wb(&worker->i_done, 4, 1); + continue; + } + + // nothing to do? Write out non-critical caches + if (dirty) { + syscalls.cache_wb(spu.spuMemC + 0x800, 0x800, 1); + syscalls.cache_wb(spu.SB, sizeof(spu.SB[0]) * SB_SIZE * 24, 1); + dirty = 0; + continue; + } + + // this ->active loop thing is to avoid a race where we miss + // new work and clear ->active just after ARM checks it + worker->active--; + syscalls.cache_wb(&worker->active, 4, 1); + } } -#endif static unsigned int exec(dsp_component_cmd_t cmd, unsigned int arg1, unsigned int arg2, unsigned int *ret1, unsigned int *ret2) { struct region_mem *mem = (void *)arg1; - int i; switch (cmd) { case CCMD_INIT: InitADSR(); spu.spuMemC = mem->spu_ram; - spu.sRVBStart = mem->RVB; - SSumLR = mem->SSumLR; spu.SB = mem->SB; spu.s_chan = mem->s_chan; worker = &mem->worker; @@ -69,18 +111,22 @@ static unsigned int exec(dsp_component_cmd_t cmd, mem->sizeof_region_mem = sizeof(*mem); mem->offsetof_s_chan1 = offsetof(typeof(*mem), s_chan[1]); - mem->offsetof_worker_ram = offsetof(typeof(*mem), worker.ch[1]); + mem->offsetof_spos_3_20 = offsetof(typeof(*mem), worker.i[3].ch[20]); // seems to be unneeded, no write-alloc? but just in case.. syscalls.cache_wb(&mem->sizeof_region_mem, 3 * 4, 1); break; case CCMD_DOIT: - do_channel_work(); + worker->active = ACTIVE_CNT; + worker->boot_cnt++; + syscalls.cache_wb(&worker->i_done, 64, 1); + memcpy(&spu_config, &mem->spu_config, sizeof(spu_config)); + + do_processing(); + // c64_tools lib does BCACHE_wbInvAll() when it receives mailbox irq, - // so there is no benefit of syncing only what's needed. - // But call wbInvAll() anyway in case c64_tools is ever fixed.. - //sync_caches(); - syscalls.cache_wbInvAll(); + // but invalidate anyway in case c64_tools is ever fixed.. + syscalls.cache_inv(mem, sizeof(mem->spu_ram) + sizeof(mem->SB), 0); break; default: diff --git a/plugins/dfsound/spu_config.h b/plugins/dfsound/spu_config.h index 9a139e1..fce1cda 100644 --- a/plugins/dfsound/spu_config.h +++ b/plugins/dfsound/spu_config.h @@ -8,6 +8,9 @@ typedef struct int iUseInterpolation; int iTempo; int iUseThread; + + // status + int iThreadAvail; } SPUConfig; extern SPUConfig spu_config; diff --git a/plugins/dfsound/xa.c b/plugins/dfsound/xa.c index 6c0ce4b..ad7e824 100644 --- a/plugins/dfsound/xa.c +++ b/plugins/dfsound/xa.c @@ -38,7 +38,7 @@ static int gauss_window[8] = {0, 0, 0, 0, 0, 0, 0, 0}; // MIX XA & CDDA //////////////////////////////////////////////////////////////////////// -INLINE void MixXA(int ns_to, int decode_pos) +INLINE void MixXA(int *SSumLR, int ns_to, int decode_pos) { int cursor = decode_pos; int ns; |