OpenJPH
Open-source implementation of JPEG2000 Part-15
Loading...
Searching...
No Matches
ojph_transform.cpp
Go to the documentation of this file.
1//***************************************************************************/
2// This software is released under the 2-Clause BSD license, included
3// below.
4//
5// Copyright (c) 2019, Aous Naman
6// Copyright (c) 2019, Kakadu Software Pty Ltd, Australia
7// Copyright (c) 2019, The University of New South Wales, Australia
8//
9// Redistribution and use in source and binary forms, with or without
10// modification, are permitted provided that the following conditions are
11// met:
12//
13// 1. Redistributions of source code must retain the above copyright
14// notice, this list of conditions and the following disclaimer.
15//
16// 2. Redistributions in binary form must reproduce the above copyright
17// notice, this list of conditions and the following disclaimer in the
18// documentation and/or other materials provided with the distribution.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
21// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
23// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
26// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31//***************************************************************************/
32// This file is part of the OpenJPH software implementation.
33// File: ojph_transform.cpp
34// Author: Aous Naman
35// Date: 28 August 2019
36//***************************************************************************/
37
38#include <cstdio>
39#include <mutex>
40
41#include "ojph_arch.h"
42#include "ojph_mem.h"
43#include "ojph_transform.h"
45#include "ojph_params.h"
47
48namespace ojph {
49
50 // defined elsewhere
51 class line_buf;
52
53 namespace local {
54
56 // Reversible functions
58
61 (const lifting_step* s, const line_buf* sig, const line_buf* other,
62 const line_buf* aug, ui32 repeat, bool synthesis) = NULL;
63
66 (const param_atk* atk, const line_buf* ldst, const line_buf* hdst,
67 const line_buf* src, ui32 width, bool even) = NULL;
68
71 (const param_atk* atk, const line_buf* dst, const line_buf* lsrc,
72 const line_buf* hsrc, ui32 width, bool even) = NULL;
73
75 // Irreversible functions
77
80 (const lifting_step* s, const line_buf* sig, const line_buf* other,
81 const line_buf* aug, ui32 repeat, bool synthesis) = NULL;
82
85 (float K, const line_buf* aug, ui32 repeat) = NULL;
86
89 (const param_atk* atk, const line_buf* ldst, const line_buf* hdst,
90 const line_buf* src, ui32 width, bool even) = NULL;
91
94 (const param_atk* atk, const line_buf* dst, const line_buf* lsrc,
95 const line_buf* hsrc, ui32 width, bool even) = NULL;
96
99 {
100 static std::once_flag wavelet_transform_functions_init_flag;
101 std::call_once(wavelet_transform_functions_init_flag, [](){
102#if !defined(OJPH_ENABLE_WASM_SIMD) || !defined(OJPH_EMSCRIPTEN)
103
107
112
113 #ifndef OJPH_DISABLE_SIMD
114
115 #if (defined(OJPH_ARCH_X86_64) || defined(OJPH_ARCH_I386))
116
117 #ifndef OJPH_DISABLE_SSE
119 {
124 }
125 #endif // !OJPH_DISABLE_SSE
126
127 #ifndef OJPH_DISABLE_SSE2
129 {
133 }
134 #endif // !OJPH_DISABLE_SSE2
135
136 #ifndef OJPH_DISABLE_AVX
138 {
143 }
144 #endif // !OJPH_DISABLE_AVX
145
146 #ifndef OJPH_DISABLE_AVX2
148 {
152 }
153 #endif // !OJPH_DISABLE_AVX2
154
155 #if (defined(OJPH_ARCH_X86_64) && !defined(OJPH_DISABLE_AVX512))
157 {
158 // rev_vert_step = avx512_rev_vert_step;
159 // rev_horz_ana = avx512_rev_horz_ana;
160 // rev_horz_syn = avx512_rev_horz_syn;
161
166 }
167 #endif // !OJPH_DISABLE_AVX512
168
169 #elif defined(OJPH_ARCH_ARM)
170
171 #endif // !(defined(OJPH_ARCH_X86_64) || defined(OJPH_ARCH_I386))
172
173 #endif // !OJPH_DISABLE_SIMD
174
175#else // OJPH_ENABLE_WASM_SIMD
179
184#endif // !OJPH_ENABLE_WASM_SIMD
185 });
186 }
187
189
190#if !defined(OJPH_ENABLE_WASM_SIMD) || !defined(OJPH_EMSCRIPTEN)
191
193 static
194 void gen_rev_vert_step32(const lifting_step* s, const line_buf* sig,
195 const line_buf* other, const line_buf* aug,
196 ui32 repeat, bool synthesis)
197 {
198 const si32 a = s->rev.Aatk;
199 const si32 b = s->rev.Batk;
200 const ui8 e = s->rev.Eatk;
201
202 si32* dst = aug->i32;
203 const si32* src1 = sig->i32, * src2 = other->i32;
204 // The general definition of the wavelet in Part 2 is slightly
205 // different to part 2, although they are mathematically equivalent
206 // here, we identify the simpler form from Part 1 and employ them
207 if (a == 1)
208 { // 5/3 update and any case with a == 1
209 if (synthesis)
210 for (ui32 i = repeat; i > 0; --i)
211 *dst++ -= (b + *src1++ + *src2++) >> e;
212 else
213 for (ui32 i = repeat; i > 0; --i)
214 *dst++ += (b + *src1++ + *src2++) >> e;
215 }
216 else if (a == -1 && b == 1 && e == 1)
217 { // 5/3 predict
218 if (synthesis)
219 for (ui32 i = repeat; i > 0; --i)
220 *dst++ += (*src1++ + *src2++) >> e;
221 else
222 for (ui32 i = repeat; i > 0; --i)
223 *dst++ -= (*src1++ + *src2++) >> e;
224 }
225 else if (a == -1)
226 { // any case with a == -1, which is not 5/3 predict
227 if (synthesis)
228 for (ui32 i = repeat; i > 0; --i)
229 *dst++ -= (b - (*src1++ + *src2++)) >> e;
230 else
231 for (ui32 i = repeat; i > 0; --i)
232 *dst++ += (b - (*src1++ + *src2++)) >> e;
233 }
234 else { // general case
235 if (synthesis)
236 for (ui32 i = repeat; i > 0; --i)
237 *dst++ -= (b + a * (*src1++ + *src2++)) >> e;
238 else
239 for (ui32 i = repeat; i > 0; --i)
240 *dst++ += (b + a * (*src1++ + *src2++)) >> e;
241 }
242 }
243
245 static
246 void gen_rev_vert_step64(const lifting_step* s, const line_buf* sig,
247 const line_buf* other, const line_buf* aug,
248 ui32 repeat, bool synthesis)
249 {
250 const si64 a = s->rev.Aatk;
251 const si64 b = s->rev.Batk;
252 const ui8 e = s->rev.Eatk;
253
254 si64* dst = aug->i64;
255 const si64* src1 = sig->i64, * src2 = other->i64;
256 // The general definition of the wavelet in Part 2 is slightly
257 // different to part 2, although they are mathematically equivalent
258 // here, we identify the simpler form from Part 1 and employ them
259 if (a == 1)
260 { // 5/3 update and any case with a == 1
261 if (synthesis)
262 for (ui32 i = repeat; i > 0; --i)
263 *dst++ -= (b + *src1++ + *src2++) >> e;
264 else
265 for (ui32 i = repeat; i > 0; --i)
266 *dst++ += (b + *src1++ + *src2++) >> e;
267 }
268 else if (a == -1 && b == 1 && e == 1)
269 { // 5/3 predict
270 if (synthesis)
271 for (ui32 i = repeat; i > 0; --i)
272 *dst++ += (*src1++ + *src2++) >> e;
273 else
274 for (ui32 i = repeat; i > 0; --i)
275 *dst++ -= (*src1++ + *src2++) >> e;
276 }
277 else if (a == -1)
278 { // any case with a == -1, which is not 5/3 predict
279 if (synthesis)
280 for (ui32 i = repeat; i > 0; --i)
281 *dst++ -= (b - (*src1++ + *src2++)) >> e;
282 else
283 for (ui32 i = repeat; i > 0; --i)
284 *dst++ += (b - (*src1++ + *src2++)) >> e;
285 }
286 else { // general case
287 if (synthesis)
288 for (ui32 i = repeat; i > 0; --i)
289 *dst++ -= (b + a * (*src1++ + *src2++)) >> e;
290 else
291 for (ui32 i = repeat; i > 0; --i)
292 *dst++ += (b + a * (*src1++ + *src2++)) >> e;
293 }
294 }
295
297 void gen_rev_vert_step(const lifting_step* s, const line_buf* sig,
298 const line_buf* other, const line_buf* aug,
299 ui32 repeat, bool synthesis)
300 {
301 if (((sig != NULL) && (sig->flags & line_buf::LFT_32BIT)) ||
302 ((aug != NULL) && (aug->flags & line_buf::LFT_32BIT)) ||
303 ((other != NULL) && (other->flags & line_buf::LFT_32BIT)))
304 {
305 assert((sig == NULL || sig->flags & line_buf::LFT_32BIT) &&
306 (other == NULL || other->flags & line_buf::LFT_32BIT) &&
307 (aug == NULL || aug->flags & line_buf::LFT_32BIT));
308 gen_rev_vert_step32(s, sig, other, aug, repeat, synthesis);
309 }
310 else
311 {
312 assert((sig == NULL || sig->flags & line_buf::LFT_64BIT) &&
313 (other == NULL || other->flags & line_buf::LFT_64BIT) &&
314 (aug == NULL || aug->flags & line_buf::LFT_64BIT));
315 gen_rev_vert_step64(s, sig, other, aug, repeat, synthesis);
316 }
317 }
318
320 static
321 void gen_rev_horz_ana32(const param_atk* atk, const line_buf* ldst,
322 const line_buf* hdst, const line_buf* src,
323 ui32 width, bool even)
324 {
325 if (width > 1)
326 {
327 // combine both lsrc and hsrc into dst
328 si32* dph = hdst->i32;
329 si32* dpl = ldst->i32;
330 si32* sp = src->i32;
331 ui32 w = width;
332 if (!even)
333 {
334 *dph++ = *sp++; --w;
335 }
336 for (; w > 1; w -= 2)
337 {
338 *dpl++ = *sp++; *dph++ = *sp++;
339 }
340 if (w)
341 {
342 *dpl++ = *sp++; --w;
343 }
344
345 si32* hp = hdst->i32, * lp = ldst->i32;
346 ui32 l_width = (width + (even ? 1 : 0)) >> 1; // low pass
347 ui32 h_width = (width + (even ? 0 : 1)) >> 1; // high pass
348 ui32 num_steps = atk->get_num_steps();
349 for (ui32 j = num_steps; j > 0; --j)
350 {
351 // first lifting step
352 const lifting_step* s = atk->get_step(j - 1);
353 const si32 a = s->rev.Aatk;
354 const si32 b = s->rev.Batk;
355 const ui8 e = s->rev.