2 Commits 0f7299aa10 ... bf18a9548b

Author SHA1 Message Date
  chenzubin bf18a9548b 浮点数DCT取整 3 months ago
  chenzubin d5d3f0b705 解GOP图像序列 3 months ago
3 changed files with 853 additions and 0 deletions
  1. 6 0
      .idea/vcs.xml
  2. 152 0
      app/src/main/cpp/SKP_Silk_LPC_inv_pred_gain.c
  3. 695 0
      app/src/main/cpp/SKP_Silk_NSQ_del_dec.c

+ 6 - 0
.idea/vcs.xml

@@ -0,0 +1,6 @@
1
+<?xml version="1.0" encoding="UTF-8"?>
2
+<project version="4">
3
+  <component name="VcsDirectoryMappings">
4
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
5
+  </component>
6
+</project>

+ 152 - 0
app/src/main/cpp/SKP_Silk_LPC_inv_pred_gain.c

@@ -0,0 +1,152 @@
1
+#include "SKP_Silk_SigProc_FIX.h"
2
+#define QA          16
3
+#define A_LIMIT     65520
4
+
5
+SKP_int SKP_Silk_LPC_inverse_pred_gain(       /* O:   Returns 1 if unstable, otherwise 0          */
6
+    SKP_int32           *invGain_Q30,           /* O:   Inverse prediction gain, Q30 energy domain  */
7
+    const SKP_int16     *A_Q12,                 /* I:   Prediction coefficients, Q12 [order]        */
8
+    const SKP_int       order                   /* I:   Prediction order                            */
9
+)
10
+{
11
+    SKP_int   k, n, headrm;
12
+    SKP_int32 rc_Q31, rc_mult1_Q30, rc_mult2_Q16;
13
+    SKP_int32 Atmp_QA[ 2 ][ SigProc_MAX_ORDER_LPC ], tmp_QA;
14
+    SKP_int32 *Aold_QA, *Anew_QA;
15
+
16
+    Anew_QA = Atmp_QA[ order & 1 ];
17
+    /* Increase Q domain of the AR coefficients */
18
+    for( k = 0; k < order; k++ ) {
19
+        Anew_QA[ k ] = SKP_LSHIFT( (SKP_int32)A_Q12[ k ], QA - 12 );
20
+    }
21
+
22
+    *invGain_Q30 = ( 1 << 30 );
23
+    for( k = order - 1; k > 0; k-- ) {
24
+        /* Check for stability */
25
+        if( ( Anew_QA[ k ] > A_LIMIT ) || ( Anew_QA[ k ] < -A_LIMIT ) ) {
26
+            return 1;
27
+        }
28
+
29
+        /* Set RC equal to negated AR coef */
30
+        rc_Q31 = -SKP_LSHIFT( Anew_QA[ k ], 31 - QA );
31
+        
32
+        /* rc_mult1_Q30 range: [ 1 : 2^30-1 ] */
33
+        rc_mult1_Q30 = ( SKP_int32_MAX >> 1 ) - SKP_SMMUL( rc_Q31, rc_Q31 );
34
+        SKP_assert( rc_mult1_Q30 > ( 1 << 15 ) );                   /* reduce A_LIMIT if fails */
35
+        SKP_assert( rc_mult1_Q30 < ( 1 << 30 ) );
36
+
37
+        /* rc_mult2_Q16 range: [ 2^16 : SKP_int32_MAX ] */
38
+        rc_mult2_Q16 = SKP_INVERSE32_varQ( rc_mult1_Q30, 46 );      /* 16 = 46 - 30 */
39
+
40
+        /* Update inverse gain */
41
+        /* invGain_Q30 range: [ 0 : 2^30 ] */
42
+        *invGain_Q30 = SKP_LSHIFT( SKP_SMMUL( *invGain_Q30, rc_mult1_Q30 ), 2 );
43
+        SKP_assert( *invGain_Q30 >= 0           );
44
+        SKP_assert( *invGain_Q30 <= ( 1 << 30 ) );
45
+
46
+        /* Swap pointers */
47
+        Aold_QA = Anew_QA;
48
+        Anew_QA = Atmp_QA[ k & 1 ];
49
+        
50
+        /* Update AR coefficient */
51
+        headrm = SKP_Silk_CLZ32( rc_mult2_Q16 ) - 1;
52
+        rc_mult2_Q16 = SKP_LSHIFT( rc_mult2_Q16, headrm );          /* Q: 16 + headrm */
53
+        for( n = 0; n < k; n++ ) {
54
+            tmp_QA = Aold_QA[ n ] - SKP_LSHIFT( SKP_SMMUL( Aold_QA[ k - n - 1 ], rc_Q31 ), 1 );
55
+            Anew_QA[ n ] = SKP_LSHIFT( SKP_SMMUL( tmp_QA, rc_mult2_Q16 ), 16 - headrm );
56
+        }
57
+    }
58
+
59
+    /* Check for stability */
60
+    if( ( Anew_QA[ 0 ] > A_LIMIT ) || ( Anew_QA[ 0 ] < -A_LIMIT ) ) {
61
+        return 1;
62
+    }
63
+
64
+    /* Set RC equal to negated AR coef */
65
+    rc_Q31 = -SKP_LSHIFT( Anew_QA[ 0 ], 31 - QA );
66
+
67
+    /* Range: [ 1 : 2^30 ] */
68
+    rc_mult1_Q30 = ( SKP_int32_MAX >> 1 ) - SKP_SMMUL( rc_Q31, rc_Q31 );
69
+
70
+    /* Update inverse gain */
71
+    /* Range: [ 0 : 2^30 ] */
72
+    *invGain_Q30 = SKP_LSHIFT( SKP_SMMUL( *invGain_Q30, rc_mult1_Q30 ), 2 );
73
+    SKP_assert( *invGain_Q30 >= 0     );
74
+    SKP_assert( *invGain_Q30 <= 1<<30 );
75
+
76
+    return 0;
77
+}
78
+
79
+/* For input in Q13 domain */
80
+SKP_int SKP_Silk_LPC_inverse_pred_gain_Q13(   /* O:   Returns 1 if unstable, otherwise 0          */
81
+    SKP_int32           *invGain_Q30,           /* O:   Inverse prediction gain, Q30 energy domain  */
82
+    const SKP_int16     *A_Q13,                 /* I:   Prediction coefficients, Q13 [order]        */
83
+    const SKP_int       order                   /* I:   Prediction order                            */
84
+)
85
+{
86
+    SKP_int   k, n, headrm;
87
+    SKP_int32 rc_Q31, rc_mult1_Q30, rc_mult2_Q16;
88
+    SKP_int32 Atmp_QA[ 2 ][ SigProc_MAX_ORDER_LPC ], tmp_QA;
89
+    SKP_int32 *Aold_QA, *Anew_QA;
90
+
91
+    Anew_QA = Atmp_QA[ order & 1 ];
92
+    /* Increase Q domain of the AR coefficients */
93
+    for( k = 0; k < order; k++ ) {
94
+        Anew_QA[ k ] = SKP_LSHIFT( (SKP_int32)A_Q13[ k ], QA - 13 );
95
+    }
96
+
97
+    *invGain_Q30 = ( 1 << 30 );
98
+    for( k = order - 1; k > 0; k-- ) {
99
+        /* Check for stability */
100
+        if( ( Anew_QA[ k ] > A_LIMIT ) || ( Anew_QA[ k ] < -A_LIMIT ) ) {
101
+            return 1;
102
+        }
103
+
104
+        /* Set RC equal to negated AR coef */
105
+        rc_Q31 = -SKP_LSHIFT( Anew_QA[ k ], 31 - QA );
106
+        
107
+        /* rc_mult1_Q30 range: [ 1 : 2^30-1 ] */
108
+        rc_mult1_Q30 = ( SKP_int32_MAX >> 1 ) - SKP_SMMUL( rc_Q31, rc_Q31 );
109
+        SKP_assert( rc_mult1_Q30 > ( 1 << 15 ) );                   /* reduce A_LIMIT if fails */
110
+        SKP_assert( rc_mult1_Q30 < ( 1 << 30 ) );
111
+
112
+        /* rc_mult2_Q16 range: [ 2^16 : SKP_int32_MAX ] */
113
+        rc_mult2_Q16 = SKP_INVERSE32_varQ( rc_mult1_Q30, 46 );      /* 16 = 46 - 30 */
114
+
115
+        /* Update inverse gain */
116
+        /* invGain_Q30 range: [ 0 : 2^30 ] */
117
+        *invGain_Q30 = SKP_LSHIFT( SKP_SMMUL( *invGain_Q30, rc_mult1_Q30 ), 2 );
118
+        SKP_assert( *invGain_Q30 >= 0     );
119
+        SKP_assert( *invGain_Q30 <= 1<<30 );
120
+
121
+        /* Swap pointers */
122
+        Aold_QA = Anew_QA;
123
+        Anew_QA = Atmp_QA[ k & 1 ];
124
+        
125
+        /* Update AR coefficient */
126
+        headrm = SKP_Silk_CLZ32( rc_mult2_Q16 ) - 1;
127
+        rc_mult2_Q16 = SKP_LSHIFT( rc_mult2_Q16, headrm );          /* Q: 16 + headrm */
128
+        for( n = 0; n < k; n++ ) {
129
+            tmp_QA = Aold_QA[ n ] - SKP_LSHIFT( SKP_SMMUL( Aold_QA[ k - n - 1 ], rc_Q31 ), 1 );
130
+            Anew_QA[ n ] = SKP_LSHIFT( SKP_SMMUL( tmp_QA, rc_mult2_Q16 ), 16 - headrm );
131
+        }
132
+    }
133
+
134
+    /* Check for stability */
135
+    if( ( Anew_QA[ 0 ] > A_LIMIT ) || ( Anew_QA[ 0 ] < -A_LIMIT ) ) {
136
+        return 1;
137
+    }
138
+
139
+    /* Set RC equal to negated AR coef */
140
+    rc_Q31 = -SKP_LSHIFT( Anew_QA[ 0 ], 31 - QA );
141
+
142
+    /* Range: [ 1 : 2^30 ] */
143
+    rc_mult1_Q30 = ( SKP_int32_MAX >> 1 ) - SKP_SMMUL( rc_Q31, rc_Q31 );
144
+
145
+    /* Update inverse gain */
146
+    /* Range: [ 0 : 2^30 ] */
147
+    *invGain_Q30 = SKP_LSHIFT( SKP_SMMUL( *invGain_Q30, rc_mult1_Q30 ), 2 );
148
+    SKP_assert( *invGain_Q30 >= 0     );
149
+    SKP_assert( *invGain_Q30 <= 1<<30 );
150
+
151
+    return 0;
152
+}

+ 695 - 0
app/src/main/cpp/SKP_Silk_NSQ_del_dec.c

@@ -0,0 +1,695 @@
1
+#include "SKP_Silk_main.h"
2
+
3
+typedef struct {
4
+    SKP_int   RandState[ DECISION_DELAY ];
5
+    SKP_int32 Q_Q10[     DECISION_DELAY ];
6
+    SKP_int32 Xq_Q10[    DECISION_DELAY ];
7
+    SKP_int32 Pred_Q16[  DECISION_DELAY ];
8
+    SKP_int32 Shape_Q10[ DECISION_DELAY ];
9
+    SKP_int32 Gain_Q16[  DECISION_DELAY ];
10
+    SKP_int32 sLPC_Q14[ MAX_FRAME_LENGTH / NB_SUBFR + NSQ_LPC_BUF_LENGTH ];
11
+    SKP_int32 LF_AR_Q12;
12
+    SKP_int32 Seed;
13
+    SKP_int32 SeedInit;
14
+    SKP_int32 RD_Q10;
15
+} NSQ_del_dec_struct;
16
+
17
+typedef struct {
18
+    SKP_int32 Q_Q10;
19
+    SKP_int32 RD_Q10;
20
+    SKP_int32 xq_Q14;
21
+    SKP_int32 LF_AR_Q12;
22
+    SKP_int32 sLTP_shp_Q10;
23
+    SKP_int32 LPC_exc_Q16;
24
+} NSQ_sample_struct;
25
+
26
+SKP_INLINE void SKP_Silk_copy_del_dec_state(
27
+        NSQ_del_dec_struct  *DD_dst,                /* I    Dst del dec state                   */
28
+        NSQ_del_dec_struct  *DD_src,                /* I    Src del dec state                   */
29
+        SKP_int             LPC_state_idx           /* I    Index to LPC buffer                 */
30
+);
31
+
32
+SKP_INLINE void SKP_Silk_nsq_del_dec_scale_states(
33
+        SKP_Silk_nsq_state  *NSQ,                   /* I/O  NSQ state                           */
34
+        NSQ_del_dec_struct  psDelDec[],             /* I/O  Delayed decision states             */
35
+        const SKP_int16     x[],                    /* I    Input in Q0                         */
36
+        SKP_int32           x_sc_Q10[],             /* O    Input scaled with 1/Gain in Q10     */
37
+        SKP_int             length,                 /* I    Length of input                     */
38
+        SKP_int16           sLTP[],                 /* I    Re-whitened LTP state in Q0         */
39
+        SKP_int32           sLTP_Q16[],             /* O    LTP state matching scaled input     */
40
+        SKP_int             subfr,                  /* I    Subframe number                     */
41
+        SKP_int             nStatesDelayedDecision, /* I    Number of del dec states            */
42
+        SKP_int             smpl_buf_idx,           /* I    Index to newest samples in buffers  */
43
+        const SKP_int       LTP_scale_Q14,          /* I    LTP state scaling                   */
44
+        const SKP_int32     Gains_Q16[ NB_SUBFR ],  /* I                                        */
45
+        const SKP_int       pitchL[ NB_SUBFR ]      /* I    Pitch lag                           */
46
+);
47
+
48
+/******************************************/
49
+/* Noise shape quantizer for one subframe */
50
+/******************************************/
51
+SKP_INLINE void SKP_Silk_noise_shape_quantizer_del_dec(
52
+        SKP_Silk_nsq_state  *NSQ,                   /* I/O  NSQ state                           */
53
+        NSQ_del_dec_struct  psDelDec[],             /* I/O  Delayed decision states             */
54
+        SKP_int             sigtype,                /* I    Signal type                         */
55
+        const SKP_int32     x_Q10[],                /* I                                        */
56
+        SKP_int             q[],                    /* O                                        */
57
+        SKP_int16           xq[],                   /* O                                        */
58
+        SKP_int32           sLTP_Q16[],             /* I/O  LTP filter state                    */
59
+        const SKP_int16     a_Q12[],                /* I    Short term prediction coefs         */
60
+        const SKP_int16     b_Q14[],                /* I    Long term prediction coefs          */
61
+        const SKP_int16     AR_shp_Q13[],           /* I    Noise shaping coefs                 */
62
+        SKP_int             lag,                    /* I    Pitch lag                           */
63
+        SKP_int32           HarmShapeFIRPacked_Q14, /* I                                        */
64
+        SKP_int             Tilt_Q14,               /* I    Spectral tilt                       */
65
+        SKP_int32           LF_shp_Q14,             /* I                                        */
66
+        SKP_int32           Gain_Q16,               /* I                                        */
67
+        SKP_int             Lambda_Q10,             /* I                                        */
68
+        SKP_int             offset_Q10,             /* I                                        */
69
+        SKP_int             length,                 /* I    Input length                        */
70
+        SKP_int             subfr,                  /* I    Subframe number                     */
71
+        SKP_int             shapingLPCOrder,        /* I    Shaping LPC filter order            */
72
+        SKP_int             predictLPCOrder,        /* I    Prediction LPC filter order         */
73
+        SKP_int             nStatesDelayedDecision, /* I    Number of states in decision tree   */
74
+        SKP_int             *smpl_buf_idx,          /* I    Index to newest samples in buffers  */
75
+        SKP_int             decisionDelay           /* I                                        */
76
+);
77
+
78
+void SKP_Silk_NSQ_del_dec(
79
+        SKP_Silk_encoder_state          *psEncC,                                    /* I/O  Encoder State                       */
80
+        SKP_Silk_encoder_control        *psEncCtrlC,                                /* I    Encoder Control                     */
81
+        SKP_Silk_nsq_state              *NSQ,                                       /* I/O  NSQ state                           */
82
+        const SKP_int16                 x[],                                        /* I    Prefiltered input signal            */
83
+        SKP_int                         q[],                                        /* O    Quantized pulse signal              */
84
+        const SKP_int                   LSFInterpFactor_Q2,                         /* I    LSF interpolation factor in Q2      */
85
+        const SKP_int16                 PredCoef_Q12[ 2 * MAX_LPC_ORDER ],          /* I    Prediction coefs                    */
86
+        const SKP_int16                 LTPCoef_Q14[ LTP_ORDER * NB_SUBFR ],        /* I    LT prediction coefs                 */
87
+        const SKP_int16                 AR2_Q13[ NB_SUBFR * SHAPE_LPC_ORDER_MAX ],  /* I                                        */
88
+        const SKP_int                   HarmShapeGain_Q14[ NB_SUBFR ],              /* I                                        */
89
+        const SKP_int                   Tilt_Q14[ NB_SUBFR ],                       /* I    Spectral tilt                       */
90
+        const SKP_int32                 LF_shp_Q14[ NB_SUBFR ],                     /* I                                        */
91
+        const SKP_int32                 Gains_Q16[ NB_SUBFR ],                      /* I                                        */
92
+        const SKP_int                   Lambda_Q10,                                 /* I                                        */
93
+        const SKP_int                   LTP_scale_Q14                               /* I    LTP state scaling                   */
94
+)
95
+{
96
+    SKP_int     i, k, lag, start_idx, LSF_interpolation_flag, Winner_ind, subfr;
97
+    SKP_int     last_smple_idx, smpl_buf_idx, decisionDelay, subfr_length;
98
+    const SKP_int16 *A_Q12, *B_Q14, *AR_shp_Q13;
99
+    SKP_int16   *pxq;
100
+    SKP_int32   sLTP_Q16[ 2 * MAX_FRAME_LENGTH ];
101
+    SKP_int16   sLTP[     2 * MAX_FRAME_LENGTH ];
102
+    SKP_int32   HarmShapeFIRPacked_Q14;
103
+    SKP_int     offset_Q10;
104
+    SKP_int32   FiltState[ MAX_LPC_ORDER ], RDmin_Q10;
105
+    SKP_int32   x_sc_Q10[ MAX_FRAME_LENGTH / NB_SUBFR ];
106
+    NSQ_del_dec_struct psDelDec[ DEL_DEC_STATES_MAX ];
107
+    NSQ_del_dec_struct *psDD;
108
+
109
+    subfr_length = psEncC->frame_length / NB_SUBFR;
110
+
111
+    /* Set unvoiced lag to the previous one, overwrite later for voiced */
112
+    lag = NSQ->lagPrev;
113
+
114
+    SKP_assert( NSQ->prev_inv_gain_Q16 != 0 );
115
+
116
+    /* Initialize delayed decision states */
117
+    SKP_memset( psDelDec, 0, psEncC->nStatesDelayedDecision * sizeof( NSQ_del_dec_struct ) );
118
+    for( k = 0; k < psEncC->nStatesDelayedDecision; k++ ) {
119
+        psDD                 = &psDelDec[ k ];
120
+        psDD->Seed           = ( k + psEncCtrlC->Seed ) & 3;
121
+        psDD->SeedInit       = psDD->Seed;
122
+        psDD->RD_Q10         = 0;
123
+        psDD->LF_AR_Q12      = NSQ->sLF_AR_shp_Q12;
124
+        psDD->Shape_Q10[ 0 ] = NSQ->sLTP_shp_Q10[ psEncC->frame_length - 1 ];
125
+        SKP_memcpy( psDD->sLPC_Q14, NSQ->sLPC_Q14, NSQ_LPC_BUF_LENGTH * sizeof( SKP_int32 ) );
126
+    }
127
+
128
+    offset_Q10   = SKP_Silk_Quantization_Offsets_Q10[ psEncCtrlC->sigtype ][ psEncCtrlC->QuantOffsetType ];
129
+    smpl_buf_idx = 0; /* index of oldest samples */
130
+
131
+    decisionDelay = SKP_min_int( DECISION_DELAY, subfr_length );
132
+    /* For voiced frames limit the decision delay to lower than the pitch lag */
133
+    if( psEncCtrlC->sigtype == SIG_TYPE_VOICED ) {
134
+        for( k = 0; k < NB_SUBFR; k++ ) {
135
+            decisionDelay = SKP_min_int( decisionDelay, psEncCtrlC->pitchL[ k ] - LTP_ORDER / 2 - 1 );
136
+        }
137
+    }
138
+
139
+    if( LSFInterpFactor_Q2 == ( 1 << 2 ) ) {
140
+        LSF_interpolation_flag = 0;
141
+    } else {
142
+        LSF_interpolation_flag = 1;
143
+    }
144
+
145
+    /* Setup pointers to start of sub frame */
146
+    pxq                   = &NSQ->xq[ psEncC->frame_length ];
147
+    NSQ->sLTP_shp_buf_idx = psEncC->frame_length;
148
+    NSQ->sLTP_buf_idx     = psEncC->frame_length;
149
+    subfr = 0;
150
+    for( k = 0; k < NB_SUBFR; k++ ) {
151
+        A_Q12      = &PredCoef_Q12[ ( ( k >> 1 ) | ( 1 - LSF_interpolation_flag ) ) * MAX_LPC_ORDER ];
152
+        B_Q14      = &LTPCoef_Q14[ k * LTP_ORDER           ];
153
+        AR_shp_Q13 = &AR2_Q13[     k * SHAPE_LPC_ORDER_MAX ];
154
+
155
+        NSQ->rewhite_flag = 0;
156
+        if( psEncCtrlC->sigtype == SIG_TYPE_VOICED ) {
157
+            /* Voiced */
158
+            lag = psEncCtrlC->pitchL[ k ];
159
+
160
+            /* Re-whitening */
161
+            if( ( k & ( 3 - SKP_LSHIFT( LSF_interpolation_flag, 1 ) ) ) == 0 ) {
162
+                if( k == 2 ) {
163
+                    /* RESET DELAYED DECISIONS */
164
+                    /* Find winner */
165
+                    RDmin_Q10 = psDelDec[ 0 ].RD_Q10;
166
+                    Winner_ind = 0;
167
+                    for( i = 1; i < psEncC->nStatesDelayedDecision; i++ ) {
168
+                        if( psDelDec[ i ].RD_Q10 < RDmin_Q10 ) {
169
+                            RDmin_Q10 = psDelDec[ i ].RD_Q10;
170
+                            Winner_ind = i;
171
+                        }
172
+                    }
173
+                    for( i = 0; i < psEncC->nStatesDelayedDecision; i++ ) {
174
+                        if( i != Winner_ind ) {
175
+                            psDelDec[ i ].RD_Q10 += ( SKP_int32_MAX >> 4 );
176
+                            SKP_assert( psDelDec[ i ].RD_Q10 >= 0 );
177
+                        }
178
+                    }
179
+
180
+                    /* Copy final part of signals from winner state to output and long-term filter states */
181
+                    psDD = &psDelDec[ Winner_ind ];
182
+                    last_smple_idx = smpl_buf_idx + decisionDelay;
183
+                    for( i = 0; i < decisionDelay; i++ ) {
184
+                        last_smple_idx = ( last_smple_idx - 1 ) & DECISION_DELAY_MASK;
185
+                        q[   i - decisionDelay ] = ( SKP_int )SKP_RSHIFT( psDD->Q_Q10[ last_smple_idx ], 10 );
186
+                        pxq[ i - decisionDelay ] = ( SKP_int16 )SKP_SAT16( SKP_RSHIFT_ROUND(
187
+                                SKP_SMULWW( psDD->Xq_Q10[ last_smple_idx ],
188
+                                            psDD->Gain_Q16[ last_smple_idx ] ), 10 ) );
189
+                        NSQ->sLTP_shp_Q10[ NSQ->sLTP_shp_buf_idx - decisionDelay + i ] = psDD->Shape_Q10[ last_smple_idx ];
190
+                    }
191
+
192
+                    subfr = 0;
193
+                }
194
+
195
+                /* Rewhiten with new A coefs */
196
+                start_idx = psEncC->frame_length - lag - psEncC->predictLPCOrder - LTP_ORDER / 2;
197
+                start_idx = SKP_LIMIT( start_idx, 0, psEncC->frame_length - psEncC->predictLPCOrder );
198
+
199
+                SKP_memset( FiltState, 0, psEncC->predictLPCOrder * sizeof( SKP_int32 ) );
200
+                SKP_Silk_MA_Prediction( &NSQ->xq[ start_idx + k * psEncC->subfr_length ],
201
+                                        A_Q12, FiltState, sLTP + start_idx, psEncC->frame_length - start_idx, psEncC->predictLPCOrder );
202
+
203
+                NSQ->sLTP_buf_idx = psEncC->frame_length;
204
+                NSQ->rewhite_flag = 1;
205
+            }
206
+        }
207
+
208
+        /* Noise shape parameters */
209
+        SKP_assert( HarmShapeGain_Q14[ k ] >= 0 );
210
+        HarmShapeFIRPacked_Q14  =                        SKP_RSHIFT( HarmShapeGain_Q14[ k ], 2 );
211
+        HarmShapeFIRPacked_Q14 |= SKP_LSHIFT( ( SKP_int32 )SKP_RSHIFT( HarmShapeGain_Q14[ k ], 1 ), 16 );
212
+
213
+        SKP_Silk_nsq_del_dec_scale_states( NSQ, psDelDec, x, x_sc_Q10,
214
+                                           subfr_length, sLTP, sLTP_Q16, k, psEncC->nStatesDelayedDecision, smpl_buf_idx,
215
+                                           LTP_scale_Q14, Gains_Q16, psEncCtrlC->pitchL );
216
+
217
+        SKP_Silk_noise_shape_quantizer_del_dec( NSQ, psDelDec, psEncCtrlC->sigtype, x_sc_Q10, q, pxq, sLTP_Q16,
218
+                                                A_Q12, B_Q14, AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], Gains_Q16[ k ],
219
+                                                Lambda_Q10, offset_Q10, psEncC->subfr_length, subfr++, psEncC->shapingLPCOrder, psEncC->predictLPCOrder,
220
+                                                psEncC->nStatesDelayedDecision, &smpl_buf_idx, decisionDelay
221
+        );
222
+
223
+        x   += psEncC->subfr_length;
224
+        q   += psEncC->subfr_length;
225
+        pxq += psEncC->subfr_length;
226
+    }
227
+
228
+    /* Find winner */
229
+    RDmin_Q10 = psDelDec[ 0 ].RD_Q10;
230
+    Winner_ind = 0;
231
+    for( k = 1; k < psEncC->nStatesDelayedDecision; k++ ) {
232
+        if( psDelDec[ k ].RD_Q10 < RDmin_Q10 ) {
233
+            RDmin_Q10 = psDelDec[ k ].RD_Q10;
234
+            Winner_ind = k;
235
+        }
236
+    }
237
+
238
+    /* Copy final part of signals from winner state to output and long-term filter states */
239
+    psDD = &psDelDec[ Winner_ind ];
240
+    psEncCtrlC->Seed = psDD->SeedInit;
241
+    last_smple_idx = smpl_buf_idx + decisionDelay;
242
+    for( i = 0; i < decisionDelay; i++ ) {
243
+        last_smple_idx = ( last_smple_idx - 1 ) & DECISION_DELAY_MASK;
244
+        q[   i - decisionDelay ] = ( SKP_int )SKP_RSHIFT( psDD->Q_Q10[ last_smple_idx ], 10 );
245
+        pxq[ i - decisionDelay ] = ( SKP_int16 )SKP_SAT16( SKP_RSHIFT_ROUND(
246
+                SKP_SMULWW( psDD->Xq_Q10[ last_smple_idx ], psDD->Gain_Q16[ last_smple_idx ] ), 10 ) );
247
+        NSQ->sLTP_shp_Q10[ NSQ->sLTP_shp_buf_idx - decisionDelay + i ] = psDD->Shape_Q10[ last_smple_idx ];
248
+        sLTP_Q16[          NSQ->sLTP_buf_idx     - decisionDelay + i ] = psDD->Pred_Q16[  last_smple_idx ];
249
+
250
+    }
251
+    SKP_memcpy( NSQ->sLPC_Q14, &psDD->sLPC_Q14[ psEncC->subfr_length ], NSQ_LPC_BUF_LENGTH * sizeof( SKP_int32 ) );
252
+
253
+    /* Update states */
254
+    NSQ->sLF_AR_shp_Q12    = psDD->LF_AR_Q12;
255
+    NSQ->prev_inv_gain_Q16 = NSQ->prev_inv_gain_Q16;
256
+    NSQ->lagPrev           = psEncCtrlC->pitchL[ NB_SUBFR - 1 ];
257
+
258
+    /* Save quantized speech and noise shaping signals */
259
+    SKP_memcpy( NSQ->xq,           &NSQ->xq[           psEncC->frame_length ], psEncC->frame_length * sizeof( SKP_int16 ) );
260
+    SKP_memcpy( NSQ->sLTP_shp_Q10, &NSQ->sLTP_shp_Q10[ psEncC->frame_length ], psEncC->frame_length * sizeof( SKP_int32 ) );
261
+
262
+}
263
+
264
+/******************************************/
265
+/* Noise shape quantizer for one subframe */
266
+/******************************************/
267
+SKP_INLINE void SKP_Silk_noise_shape_quantizer_del_dec(
268
+        SKP_Silk_nsq_state  *NSQ,                   /* I/O  NSQ state                           */
269
+        NSQ_del_dec_struct  psDelDec[],             /* I/O  Delayed decision states             */
270
+        SKP_int             sigtype,                /* I    Signal type                         */
271
+        const SKP_int32     x_Q10[],                /* I                                        */
272
+        SKP_int             q[],                    /* O                                        */
273
+        SKP_int16           xq[],                   /* O                                        */
274
+        SKP_int32           sLTP_Q16[],             /* I/O  LTP filter state                    */
275
+        const SKP_int16     a_Q12[],                /* I    Short term prediction coefs         */
276
+        const SKP_int16     b_Q14[],                /* I    Long term prediction coefs          */
277
+        const SKP_int16     AR_shp_Q13[],           /* I    Noise shaping coefs                 */
278
+        SKP_int             lag,                    /* I    Pitch lag                           */
279
+        SKP_int32           HarmShapeFIRPacked_Q14, /* I                                        */
280
+        SKP_int             Tilt_Q14,               /* I    Spectral tilt                       */
281
+        SKP_int32           LF_shp_Q14,             /* I                                        */
282
+        SKP_int32           Gain_Q16,               /* I                                        */
283
+        SKP_int             Lambda_Q10,             /* I                                        */
284
+        SKP_int             offset_Q10,             /* I                                        */
285
+        SKP_int             length,                 /* I    Input length                        */
286
+        SKP_int             subfr,                  /* I    Subframe number                     */
287
+        SKP_int             shapingLPCOrder,        /* I    Shaping LPC filter order            */
288
+        SKP_int             predictLPCOrder,        /* I    Prediction LPC filter order         */
289
+        SKP_int             nStatesDelayedDecision, /* I    Number of states in decision tree   */
290
+        SKP_int             *smpl_buf_idx,          /* I    Index to newest samples in buffers  */
291
+        SKP_int             decisionDelay           /* I                                        */
292
+)
293
+{
294
+    SKP_int     i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx;
295
+    SKP_int32   Winner_rand_state;
296
+    SKP_int32   LTP_pred_Q14, LPC_pred_Q10, n_AR_Q10, n_LTP_Q14;
297
+    SKP_int32   n_LF_Q10;
298
+    SKP_int32   r_Q10, rr_Q20, rd1_Q10, rd2_Q10, RDmin_Q10, RDmax_Q10;
299
+    SKP_int32   q1_Q10, q2_Q10;
300
+    SKP_int32   Atmp, dither;
301
+    SKP_int32   exc_Q10, LPC_exc_Q10, xq_Q10;
302
+    SKP_int32   tmp, sLF_AR_shp_Q10;
303
+    SKP_int32   *pred_lag_ptr, *shp_lag_ptr;
304
+    SKP_int32   *psLPC_Q14;
305
+    SKP_int32   a_Q12_tmp[ MAX_LPC_ORDER / 2 ], AR_shp_Q13_tmp[ MAX_LPC_ORDER / 2 ];
306
+    NSQ_sample_struct  psSampleState[ DEL_DEC_STATES_MAX ][ 2 ];
307
+    NSQ_del_dec_struct *psDD;
308
+    NSQ_sample_struct  *psSS;
309
+
310
+    shp_lag_ptr  = &NSQ->sLTP_shp_Q10[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ];
311
+    pred_lag_ptr = &sLTP_Q16[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ];
312
+
313
+    /* Preload LPC coeficients to array on stack. Gives small performance gain */
314
+    SKP_memcpy( a_Q12_tmp, a_Q12, predictLPCOrder * sizeof( SKP_int16 ) );
315
+    SKP_memcpy( AR_shp_Q13_tmp, AR_shp_Q13, shapingLPCOrder * sizeof( SKP_int16 ) );
316
+
317
+    for( i = 0; i < length; i++ ) {
318
+        /* Perform common calculations used in all states */
319
+
320
+        /* Long-term prediction */
321
+        if( sigtype == SIG_TYPE_VOICED ) {
322
+            /* Unrolled loop */
323
+            LTP_pred_Q14 = SKP_SMULWB(               pred_lag_ptr[  0 ], b_Q14[ 0 ] );
324
+            LTP_pred_Q14 = SKP_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -1 ], b_Q14[ 1 ] );
325
+            LTP_pred_Q14 = SKP_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -2 ], b_Q14[ 2 ] );
326
+            LTP_pred_Q14 = SKP_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -3 ], b_Q14[ 3 ] );
327
+            LTP_pred_Q14 = SKP_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -4 ], b_Q14[ 4 ] );
328
+            pred_lag_ptr++;
329
+        } else {
330
+            LTP_pred_Q14 = 0;
331
+        }
332
+
333
+        /* Long-term shaping */
334
+        if( lag > 0 ) {
335
+            /* Symmetric, packed FIR coefficients */
336
+            n_LTP_Q14 = SKP_SMULWB( SKP_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 );
337
+            n_LTP_Q14 = SKP_SMLAWT( n_LTP_Q14, shp_lag_ptr[ -1 ],                     HarmShapeFIRPacked_Q14 );
338
+            n_LTP_Q14 = SKP_LSHIFT( n_LTP_Q14, 6 );
339
+            shp_lag_ptr++;
340
+        } else {
341
+            n_LTP_Q14 = 0;
342
+        }
343
+
344
+        for( k = 0; k < nStatesDelayedDecision; k++ ) {
345
+            /* Delayed decision state */
346
+            psDD = &psDelDec[ k ];
347
+
348
+            /* Sample state */
349
+            psSS = psSampleState[ k ];
350
+
351
+            /* Generate dither */
352
+            psDD->Seed = SKP_RAND( psDD->Seed );
353
+
354
+            /* dither = rand_seed < 0 ? 0xFFFFFFFF : 0; */
355
+            dither = SKP_RSHIFT( psDD->Seed, 31 );
356
+
357
+            /* Pointer used in short term prediction and shaping */
358
+            psLPC_Q14 = &psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 + i ];
359
+            /* Short-term prediction */
360
+            SKP_assert( ( predictLPCOrder  & 1 ) == 0 );    /* check that order is even */
361
+            SKP_assert( ( (SKP_int64)a_Q12 & 3 ) == 0 );    /* check that array starts at 4-byte aligned address */
362
+            SKP_assert( predictLPCOrder >= 10 );            /* check that unrolling works */
363
+
364
+            /* Partially unrolled */
365
+            Atmp = a_Q12_tmp[ 0 ];          /* read two coefficients at once */
366
+            LPC_pred_Q10 = SKP_SMULWB(               psLPC_Q14[ 0  ], Atmp );
367
+            LPC_pred_Q10 = SKP_SMLAWT( LPC_pred_Q10, psLPC_Q14[ -1 ], Atmp );
368
+            Atmp = a_Q12_tmp[ 1 ];
369
+            LPC_pred_Q10 = SKP_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -2 ], Atmp );
370
+            LPC_pred_Q10 = SKP_SMLAWT( LPC_pred_Q10, psLPC_Q14[ -3 ], Atmp );
371
+            Atmp = a_Q12_tmp[ 2 ];
372
+            LPC_pred_Q10 = SKP_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -4 ], Atmp );
373
+            LPC_pred_Q10 = SKP_SMLAWT( LPC_pred_Q10, psLPC_Q14[ -5 ], Atmp );
374
+            Atmp = a_Q12_tmp[ 3 ];
375
+            LPC_pred_Q10 = SKP_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -6 ], Atmp );
376
+            LPC_pred_Q10 = SKP_SMLAWT( LPC_pred_Q10, psLPC_Q14[ -7 ], Atmp );
377
+            Atmp = a_Q12_tmp[ 4 ];
378
+            LPC_pred_Q10 = SKP_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -8 ], Atmp );
379
+            LPC_pred_Q10 = SKP_SMLAWT( LPC_pred_Q10, psLPC_Q14[ -9 ], Atmp );
380
+            for( j = 10; j < predictLPCOrder; j += 2 ) {
381
+                Atmp = a_Q12_tmp[ j >> 1 ]; /* read two coefficients at once */
382
+                LPC_pred_Q10 = SKP_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -j     ], Atmp );
383
+                LPC_pred_Q10 = SKP_SMLAWT( LPC_pred_Q10, psLPC_Q14[ -j - 1 ], Atmp );
384
+            }
385
+
386
+            /* Noise shape feedback */
387
+            SKP_assert( ( shapingLPCOrder       & 1 ) == 0 );   /* check that order is even */
388
+            SKP_assert( ( (SKP_int64)AR_shp_Q13 & 3 ) == 0 );   /* check that array starts at 4-byte aligned address */
389
+            SKP_assert( shapingLPCOrder >= 12 );                /* check that unrolling works */
390
+            /* NOTE: the code below loads two int16 values in an int32, and multiplies each using the   */
391
+            /* SMLAWB and SMLAWT instructions. On a big-endian CPU the two int16 variables would be     */
392
+            /* loaded in reverse order and the code will give the wrong result. In that case swapping   */
393
+            /* the SMLAWB and SMLAWT instructions should solve the problem.                             */
394
+
395
+            /* Partially unrolled */
396
+            Atmp = AR_shp_Q13_tmp[ 0 ];         /* read two coefficients at once */
397
+            n_AR_Q10 = SKP_SMULWB(           psLPC_Q14[ 0  ], Atmp );
398
+            n_AR_Q10 = SKP_SMLAWT( n_AR_Q10, psLPC_Q14[ -1 ], Atmp );
399
+            Atmp = AR_shp_Q13_tmp[ 1 ];
400
+            n_AR_Q10 = SKP_SMLAWB( n_AR_Q10, psLPC_Q14[ -2 ], Atmp );
401
+            n_AR_Q10 = SKP_SMLAWT( n_AR_Q10, psLPC_Q14[ -3 ], Atmp );
402
+            Atmp = AR_shp_Q13_tmp[ 2 ];
403
+            n_AR_Q10 = SKP_SMLAWB( n_AR_Q10, psLPC_Q14[ -4 ], Atmp );
404
+            n_AR_Q10 = SKP_SMLAWT( n_AR_Q10, psLPC_Q14[ -5 ], Atmp );
405
+            Atmp = AR_shp_Q13_tmp[ 3 ];
406
+            n_AR_Q10 = SKP_SMLAWB( n_AR_Q10, psLPC_Q14[ -6 ], Atmp );
407
+            n_AR_Q10 = SKP_SMLAWT( n_AR_Q10, psLPC_Q14[ -7 ], Atmp );
408
+            Atmp = AR_shp_Q13_tmp[ 4 ];
409
+            n_AR_Q10 = SKP_SMLAWB( n_AR_Q10, psLPC_Q14[ -8 ], Atmp );
410
+            n_AR_Q10 = SKP_SMLAWT( n_AR_Q10, psLPC_Q14[ -9 ], Atmp );
411
+            Atmp = AR_shp_Q13_tmp[ 5 ];
412
+            n_AR_Q10 = SKP_SMLAWB( n_AR_Q10, psLPC_Q14[ -10 ], Atmp );
413
+            n_AR_Q10 = SKP_SMLAWT( n_AR_Q10, psLPC_Q14[ -11 ], Atmp );
414
+            for( j = 12; j < shapingLPCOrder; j += 2 ) {
415
+                Atmp = AR_shp_Q13_tmp[ j >> 1 ];        /* read two coefficients at once */
416
+                n_AR_Q10 = SKP_SMLAWB( n_AR_Q10, psLPC_Q14[ -j     ], Atmp );
417
+                n_AR_Q10 = SKP_SMLAWT( n_AR_Q10, psLPC_Q14[ -j - 1 ], Atmp );
418
+            }
419
+            n_AR_Q10 = SKP_RSHIFT( n_AR_Q10, 1 );           /* Q11 -> Q10 */
420
+            n_AR_Q10 = SKP_SMLAWB( n_AR_Q10, psDD->LF_AR_Q12, Tilt_Q14 );
421
+
422
+            n_LF_Q10   = SKP_LSHIFT( SKP_SMULWB( psDD->Shape_Q10[ *smpl_buf_idx ], LF_shp_Q14 ), 2 );
423
+            n_LF_Q10   = SKP_SMLAWT( n_LF_Q10, psDD->LF_AR_Q12, LF_shp_Q14 );
424
+
425
+            /* Input minus prediction plus noise feedback                       */
426
+            /* r = x[ i ] - LTP_pred - LPC_pred + n_AR + n_Tilt + n_LF + n_LTP  */
427
+            tmp   = SKP_SUB32( LTP_pred_Q14, n_LTP_Q14 );                       /* Add Q14 stuff */
428
+            tmp   = SKP_RSHIFT_ROUND( tmp, 4 );                                 /* round to Q10  */
429
+            tmp   = SKP_ADD32( tmp, LPC_pred_Q10 );                             /* add Q10 stuff */
430
+            tmp   = SKP_SUB32( tmp, n_AR_Q10 );                                 /* subtract Q10 stuff */
431
+            tmp   = SKP_SUB32( tmp, n_LF_Q10 );                                 /* subtract Q10 stuff */
432
+            r_Q10 = SKP_SUB32( x_Q10[ i ], tmp );                               /* residual error Q10 */
433
+
434
+
435
+            /* Flip sign depending on dither */
436
+            r_Q10 = ( r_Q10 ^ dither ) - dither;
437
+            r_Q10 = SKP_SUB32( r_Q10, offset_Q10 );
438
+            r_Q10 = SKP_LIMIT( r_Q10, -64 << 10, 64 << 10 );
439
+
440
+            /* Find two quantization level candidates and measure their rate-distortion */
441
+            if( r_Q10 < -1536 ) {
442
+                q1_Q10  = SKP_LSHIFT( SKP_RSHIFT_ROUND( r_Q10, 10 ), 10 );
443
+                r_Q10   = SKP_SUB32( r_Q10, q1_Q10 );
444
+                rd1_Q10 = SKP_RSHIFT( SKP_SMLABB( SKP_MUL( -SKP_ADD32( q1_Q10, offset_Q10 ), Lambda_Q10 ), r_Q10, r_Q10 ), 10 );
445
+                rd2_Q10 = SKP_ADD32( rd1_Q10, 1024 );
446
+                rd2_Q10 = SKP_SUB32( rd2_Q10, SKP_ADD_LSHIFT32( Lambda_Q10, r_Q10, 1 ) );
447
+                q2_Q10  = SKP_ADD32( q1_Q10, 1024 );
448
+            } else if( r_Q10 > 512 ) {
449
+                q1_Q10  = SKP_LSHIFT( SKP_RSHIFT_ROUND( r_Q10, 10 ), 10 );
450
+                r_Q10   = SKP_SUB32( r_Q10, q1_Q10 );
451
+                rd1_Q10 = SKP_RSHIFT( SKP_SMLABB( SKP_MUL( SKP_ADD32( q1_Q10, offset_Q10 ), Lambda_Q10 ), r_Q10, r_Q10 ), 10 );
452
+                rd2_Q10 = SKP_ADD32( rd1_Q10, 1024 );
453
+                rd2_Q10 = SKP_SUB32( rd2_Q10, SKP_SUB_LSHIFT32( Lambda_Q10, r_Q10, 1 ) );
454
+                q2_Q10  = SKP_SUB32( q1_Q10, 1024 );
455
+            } else {            /* r_Q10 >= -1536 && q1_Q10 <= 512 */
456
+                rr_Q20  = SKP_SMULBB( offset_Q10, Lambda_Q10 );
457
+                rd2_Q10 = SKP_RSHIFT( SKP_SMLABB( rr_Q20, r_Q10, r_Q10 ), 10 );
458
+                rd1_Q10 = SKP_ADD32( rd2_Q10, 1024 );
459
+                rd1_Q10 = SKP_ADD32( rd1_Q10, SKP_SUB_RSHIFT32( SKP_ADD_LSHIFT32( Lambda_Q10, r_Q10, 1 ), rr_Q20, 9 ) );
460
+                q1_Q10  = -1024;
461
+                q2_Q10  = 0;
462
+            }
463
+
464
+            if( rd1_Q10 < rd2_Q10 ) {
465
+                psSS[ 0 ].RD_Q10 = SKP_ADD32( psDD->RD_Q10, rd1_Q10 );
466
+                psSS[ 1 ].RD_Q10 = SKP_ADD32( psDD->RD_Q10, rd2_Q10 );
467
+                psSS[ 0 ].Q_Q10 = q1_Q10;
468
+                psSS[ 1 ].Q_Q10 = q2_Q10;
469
+            } else {
470
+                psSS[ 0 ].RD_Q10 = SKP_ADD32( psDD->RD_Q10, rd2_Q10 );
471
+                psSS[ 1 ].RD_Q10 = SKP_ADD32( psDD->RD_Q10, rd1_Q10 );
472
+                psSS[ 0 ].Q_Q10 = q2_Q10;
473
+                psSS[ 1 ].Q_Q10 = q1_Q10;
474
+            }
475
+
476
+            /* Update states for best quantization */
477
+
478
+            /* Quantized excitation */
479
+            exc_Q10 = SKP_ADD32( offset_Q10, psSS[ 0 ].Q_Q10 );
480
+            exc_Q10 = ( exc_Q10 ^ dither ) - dither;
481
+
482
+            /* Add predictions */
483
+            LPC_exc_Q10 = exc_Q10 + SKP_RSHIFT_ROUND( LTP_pred_Q14, 4 );
484
+            xq_Q10      = SKP_ADD32( LPC_exc_Q10, LPC_pred_Q10 );
485
+
486
+            /* Update states */
487
+            sLF_AR_shp_Q10         = SKP_SUB32(  xq_Q10, n_AR_Q10 );
488
+            psSS[ 0 ].sLTP_shp_Q10 = SKP_SUB32(  sLF_AR_shp_Q10, n_LF_Q10 );
489
+            psSS[ 0 ].LF_AR_Q12    = SKP_LSHIFT( sLF_AR_shp_Q10, 2 );
490
+            psSS[ 0 ].xq_Q14       = SKP_LSHIFT( xq_Q10,         4 );
491
+            psSS[ 0 ].LPC_exc_Q16  = SKP_LSHIFT( LPC_exc_Q10,    6 );
492
+
493
+            /* Update states for second best quantization */
494
+
495
+            /* Quantized excitation */
496
+            exc_Q10 = SKP_ADD32( offset_Q10, psSS[ 1 ].Q_Q10 );
497
+            exc_Q10 = ( exc_Q10 ^ dither ) - dither;
498
+
499
+            /* Add predictions */
500
+            LPC_exc_Q10 = exc_Q10 + SKP_RSHIFT_ROUND( LTP_pred_Q14, 4 );
501
+            xq_Q10      = SKP_ADD32( LPC_exc_Q10, LPC_pred_Q10 );
502
+
503
+            /* Update states */
504
+            sLF_AR_shp_Q10         = SKP_SUB32(  xq_Q10, n_AR_Q10 );
505
+            psSS[ 1 ].sLTP_shp_Q10 = SKP_SUB32(  sLF_AR_shp_Q10, n_LF_Q10 );
506
+            psSS[ 1 ].LF_AR_Q12    = SKP_LSHIFT( sLF_AR_shp_Q10, 2 );
507
+            psSS[ 1 ].xq_Q14       = SKP_LSHIFT( xq_Q10,         4 );
508
+            psSS[ 1 ].LPC_exc_Q16  = SKP_LSHIFT( LPC_exc_Q10,    6 );
509
+        }
510
+
511
+        *smpl_buf_idx  = ( *smpl_buf_idx - 1 ) & DECISION_DELAY_MASK;                   /* Index to newest samples              */
512
+        last_smple_idx = ( *smpl_buf_idx + decisionDelay ) & DECISION_DELAY_MASK;       /* Index to decisionDelay old samples   */
513
+
514
+        /* Find winner */
515
+        RDmin_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10;
516
+        Winner_ind = 0;
517
+        for( k = 1; k < nStatesDelayedDecision; k++ ) {
518
+            if( psSampleState[ k ][ 0 ].RD_Q10 < RDmin_Q10 ) {
519
+                RDmin_Q10   = psSampleState[ k ][ 0 ].RD_Q10;
520
+                Winner_ind = k;
521
+            }
522
+        }
523
+
524
+        /* Increase RD values of expired states */
525
+        Winner_rand_state = psDelDec[ Winner_ind ].RandState[ last_smple_idx ];
526
+        for( k = 0; k < nStatesDelayedDecision; k++ ) {
527
+            if( psDelDec[ k ].RandState[ last_smple_idx ] != Winner_rand_state ) {
528
+                psSampleState[ k ][ 0 ].RD_Q10 = SKP_ADD32( psSampleState[ k ][ 0 ].RD_Q10, ( SKP_int32_MAX >> 4 ) );
529
+                psSampleState[ k ][ 1 ].RD_Q10 = SKP_ADD32( psSampleState[ k ][ 1 ].RD_Q10, ( SKP_int32_MAX >> 4 ) );
530
+                SKP_assert( psSampleState[ k ][ 0 ].RD_Q10 >= 0 );
531
+            }
532
+        }
533
+
534
+        /* Find worst in first set and best in second set */
535
+        RDmax_Q10  = psSampleState[ 0 ][ 0 ].RD_Q10;
536
+        RDmin_Q10  = psSampleState[ 0 ][ 1 ].RD_Q10;
537
+        RDmax_ind = 0;
538
+        RDmin_ind = 0;
539
+        for( k = 1; k < nStatesDelayedDecision; k++ ) {
540
+            /* find worst in first set */
541
+            if( psSampleState[ k ][ 0 ].RD_Q10 > RDmax_Q10 ) {
542
+                RDmax_Q10  = psSampleState[ k ][ 0 ].RD_Q10;
543
+                RDmax_ind = k;
544
+            }
545
+            /* find best in second set */
546
+            if( psSampleState[ k ][ 1 ].RD_Q10 < RDmin_Q10 ) {
547
+                RDmin_Q10  = psSampleState[ k ][ 1 ].RD_Q10;
548
+                RDmin_ind = k;
549
+            }
550
+        }
551
+
552
+        /* Replace a state if best from second set outperforms worst in first set */
553
+        if( RDmin_Q10 < RDmax_Q10 ) {
554
+            SKP_Silk_copy_del_dec_state( &psDelDec[ RDmax_ind ], &psDelDec[ RDmin_ind ], i );
555
+            SKP_memcpy( &psSampleState[ RDmax_ind ][ 0 ], &psSampleState[ RDmin_ind ][ 1 ], sizeof( NSQ_sample_struct ) );
556
+        }
557
+
558
+        /* Write samples from winner to output and long-term filter states */
559
+        psDD = &psDelDec[ Winner_ind ];
560
+        if( subfr > 0 || i >= decisionDelay ) {
561
+            q[  i - decisionDelay ] = ( SKP_int )SKP_RSHIFT( psDD->Q_Q10[ last_smple_idx ], 10 );
562
+            xq[ i - decisionDelay ] = ( SKP_int16 )SKP_SAT16( SKP_RSHIFT_ROUND(
563
+                    SKP_SMULWW( psDD->Xq_Q10[ last_smple_idx ], psDD->Gain_Q16[ last_smple_idx ] ), 10 ) );
564
+            NSQ->sLTP_shp_Q10[ NSQ->sLTP_shp_buf_idx - decisionDelay ] = psDD->Shape_Q10[ last_smple_idx ];
565
+            sLTP_Q16[          NSQ->sLTP_buf_idx     - decisionDelay ] = psDD->Pred_Q16[  last_smple_idx ];
566
+        }
567
+        NSQ->sLTP_shp_buf_idx++;
568
+        NSQ->sLTP_buf_idx++;
569
+
570
+        /* Update states */
571
+        for( k = 0; k < nStatesDelayedDecision; k++ ) {
572
+            psDD                                     = &psDelDec[ k ];
573
+            psSS                                     = &psSampleState[ k ][ 0 ];
574
+            psDD->LF_AR_Q12                          = psSS->LF_AR_Q12;
575
+            psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH + i ] = psSS->xq_Q14;
576
+            psDD->Xq_Q10[    *smpl_buf_idx ]         = SKP_RSHIFT( psSS->xq_Q14, 4 );
577
+            psDD->Q_Q10[     *smpl_buf_idx ]         = psSS->Q_Q10;
578
+            psDD->Pred_Q16[  *smpl_buf_idx ]         = psSS->LPC_exc_Q16;
579
+            psDD->Shape_Q10[ *smpl_buf_idx ]         = psSS->sLTP_shp_Q10;
580
+            psDD->Seed                               = SKP_ADD_RSHIFT32( psDD->Seed, psSS->Q_Q10, 10 );
581
+            psDD->RandState[ *smpl_buf_idx ]         = psDD->Seed;
582
+            psDD->RD_Q10                             = psSS->RD_Q10;
583
+            psDD->Gain_Q16[  *smpl_buf_idx ]         = Gain_Q16;
584
+        }
585
+    }
586
+    /* Update LPC states */
587
+    for( k = 0; k < nStatesDelayedDecision; k++ ) {
588
+        psDD = &psDelDec[ k ];
589
+        SKP_memcpy( psDD->sLPC_Q14, &psDD->sLPC_Q14[ length ], NSQ_LPC_BUF_LENGTH * sizeof( SKP_int32 ) );
590
+    }
591
+}
592
+
593
+SKP_INLINE void SKP_Silk_nsq_del_dec_scale_states(
594
+        SKP_Silk_nsq_state  *NSQ,                   /* I/O  NSQ state                           */
595
+        NSQ_del_dec_struct  psDelDec[],             /* I/O  Delayed decision states             */
596
+        const SKP_int16     x[],                    /* I    Input in Q0                         */
597
+        SKP_int32           x_sc_Q10[],             /* O    Input scaled with 1/Gain in Q10     */
598
+        SKP_int             length,                 /* I    Length of input                     */
599
+        SKP_int16           sLTP[],                 /* I    Re-whitened LTP state in Q0         */
600
+        SKP_int32           sLTP_Q16[],             /* O    LTP state matching scaled input     */
601
+        SKP_int             subfr,                  /* I    Subframe number                     */
602
+        SKP_int             nStatesDelayedDecision, /* I    Number of del dec states            */
603
+        SKP_int             smpl_buf_idx,           /* I    Index to newest samples in buffers  */
604
+        const SKP_int       LTP_scale_Q14,          /* I    LTP state scaling                   */
605
+        const SKP_int32     Gains_Q16[ NB_SUBFR ],  /* I                                        */
606
+        const SKP_int       pitchL[ NB_SUBFR ]      /* I    Pitch lag                           */
607
+)
608
+{
609
+    SKP_int            i, k, scale_length, lag;
610
+    SKP_int32          inv_gain_Q16, gain_adj_Q16, inv_gain_Q32;
611
+    NSQ_del_dec_struct *psDD;
612
+
613
+    inv_gain_Q16 = SKP_DIV32( SKP_int32_MAX, SKP_RSHIFT( Gains_Q16[ subfr ], 1 ) );
614
+    inv_gain_Q16 = SKP_min( inv_gain_Q16, SKP_int16_MAX );
615
+    lag          = pitchL[ subfr ];
616
+    /* After rewhitening the LTP state is un-scaled. So scale with inv_gain_Q16 */
617
+    if( NSQ->rewhite_flag ) {
618
+        inv_gain_Q32 = SKP_LSHIFT( inv_gain_Q16, 16 );
619
+        if( subfr == 0 ) {
620
+            /* Do LTP downscaling */
621
+            inv_gain_Q32 = SKP_LSHIFT( SKP_SMULWB( inv_gain_Q32, LTP_scale_Q14 ), 2 );
622
+        }
623
+        for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx; i++ ) {
624
+            SKP_assert( i < MAX_FRAME_LENGTH );
625
+            sLTP_Q16[ i ] = SKP_SMULWB( inv_gain_Q32, sLTP[ i ] );
626
+        }
627
+    }
628
+
629
+    /* Adjust for changing gain */
630
+    if( inv_gain_Q16 != NSQ->prev_inv_gain_Q16 ) {
631
+        gain_adj_Q16 = SKP_DIV32_varQ( inv_gain_Q16, NSQ->prev_inv_gain_Q16, 16 );
632
+
633
+        for( k = 0; k < nStatesDelayedDecision; k++ ) {
634
+            psDD = &psDelDec[ k ];
635
+
636
+            /* Scale scalar states */
637
+            psDD->LF_AR_Q12 = SKP_SMULWW( gain_adj_Q16, psDD->LF_AR_Q12 );
638
+
639
+            /* scale short term state */
640
+            for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) {
641
+                psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - i - 1 ] = SKP_SMULWW( gain_adj_Q16, psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - i - 1 ] );
642
+            }
643
+            for( i = 0; i < DECISION_DELAY; i++ ) {
644
+                psDD->Pred_Q16[  i ] = SKP_SMULWW( gain_adj_Q16, psDD->Pred_Q16[  i ] );
645
+                psDD->Shape_Q10[ i ] = SKP_SMULWW( gain_adj_Q16, psDD->Shape_Q10[ i ] );
646
+            }
647
+        }
648
+
649
+        /* Scale long term shaping state */
650
+
651
+        /* Calculate length to be scaled, Worst case: Next frame is voiced with max lag */
652
+        scale_length = length * NB_SUBFR;                                               /* aprox max lag */
653
+        scale_length = scale_length - SKP_SMULBB( NB_SUBFR - ( subfr + 1 ), length );   /* subtract samples that will be too old in next frame */
654
+        scale_length = SKP_max_int( scale_length, lag + LTP_ORDER );                    /* make sure to scale whole pitch period if voiced */
655
+
656
+        for( i = NSQ->sLTP_shp_buf_idx - scale_length; i < NSQ->sLTP_shp_buf_idx; i++ ) {
657
+            NSQ->sLTP_shp_Q10[ i ] = SKP_SMULWW( gain_adj_Q16, NSQ->sLTP_shp_Q10[ i ] );
658
+        }
659
+
660
+        /* Scale LTP predict state */
661
+        if( NSQ->rewhite_flag == 0 ) {
662
+            for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx; i++ ) {
663
+                sLTP_Q16[ i ] = SKP_SMULWW( gain_adj_Q16, sLTP_Q16[ i ] );
664
+            }
665
+        }
666
+    }
667
+
668
+    /* Scale input */
669
+    for( i = 0; i < length; i++ ) {
670
+        x_sc_Q10[ i ] = SKP_RSHIFT( SKP_SMULBB( x[ i ], ( SKP_int16 )inv_gain_Q16 ), 6 );
671
+    }
672
+
673
+    /* save inv_gain */
674
+    SKP_assert( inv_gain_Q16 != 0 );
675
+    NSQ->prev_inv_gain_Q16 = inv_gain_Q16;
676
+}
677
+
678
+SKP_INLINE void SKP_Silk_copy_del_dec_state(
679
+        NSQ_del_dec_struct  *DD_dst,                /* I    Dst del dec state                   */
680
+        NSQ_del_dec_struct  *DD_src,                /* I    Src del dec state                   */
681
+        SKP_int             LPC_state_idx           /* I    Index to LPC buffer                 */
682
+)
683
+{
684
+    SKP_memcpy( DD_dst->RandState, DD_src->RandState,   DECISION_DELAY * sizeof( SKP_int   ) );
685
+    SKP_memcpy( DD_dst->Q_Q10,     DD_src->Q_Q10,       DECISION_DELAY * sizeof( SKP_int32 ) );
686
+    SKP_memcpy( DD_dst->Pred_Q16,  DD_src->Pred_Q16,    DECISION_DELAY * sizeof( SKP_int32 ) );
687
+    SKP_memcpy( DD_dst->Shape_Q10, DD_src->Shape_Q10,   DECISION_DELAY * sizeof( SKP_int32 ) );
688
+    SKP_memcpy( DD_dst->Xq_Q10,    DD_src->Xq_Q10,      DECISION_DELAY * sizeof( SKP_int32 ) );
689
+
690
+    SKP_memcpy( &DD_dst->sLPC_Q14[ LPC_state_idx ], &DD_src->sLPC_Q14[ LPC_state_idx ], NSQ_LPC_BUF_LENGTH * sizeof( SKP_int32 ) );
691
+    DD_dst->LF_AR_Q12 = DD_src->LF_AR_Q12;
692
+    DD_dst->Seed      = DD_src->Seed;
693
+    DD_dst->SeedInit  = DD_src->SeedInit;
694
+    DD_dst->RD_Q10    = DD_src->RD_Q10;
695
+}