Radix-22 algorithm has the same multiplicative complexity as radix-4 algorithm, but retains the butterfly structure of radix-2 algorithm
基数-2^2算法具有与基数-4算法相同的乘法复杂度,但保留了基数-2算法的蝴蝶结构
Shousheng He and M. Torkelson, "A new approach to pipeline FFT processor," Proceedings of International Conference on Parallel Processing, Honolulu, HI, USA, 1996, pp. 766-770, doi: 10.1109/IPPS.1996.508145.
The butterfly output Z1(n) is sent to apply the twiddle factor, and Z1(n + N/2) is sent back to the shift registers to be “multiplied” in still next N/2 cycles when the first half of the next frame of time sequence is loaded in.
After N-1 clock cycles, The complete DFT transform result streams out to the right, in bit-reversed order. The next frame of transform can be computed without pausing due to the pipelined processing of each stages.
In practical implementation, pipeline register should be inserted between each multiplier and butterfly stage to improve the performance. Shimming registers are also needed for control signals to comply with thus revised timing. The latency of the output is then increased to N−1+3(log4N−1)without affecting the throughput rate.
The Xilinx® LogiCORE™ IP Fast Fourier Transform (FFT) core has a bit-accurate C model designed for system modeling. A MATLAB® software MEX function for seamless MATLAB software integration is also available.
`timescale 1ns / 1psmodule fft_tb; // Testbench signals reg Clk; reg [7:0] s_axis_config_tdata; reg s_axis_config_tvalid; wire s_axis_config_tready; reg [31:0] s_axis_data_tdata; reg s_axis_data_tvalid; wire s_axis_data_tready; reg s_axis_data_tlast; wire [47:0] m_axis_data_tdata; wire m_axis_data_tvalid; reg m_axis_data_tready; wire m_axis_data_tlast; wire event_frame_started; wire event_tlast_unexpected; wire event_tlast_missing; wire event_status_channel_halt; wire event_data_in_channel_halt; wire event_data_out_channel_halt; // File handlers integer file, output_file_fft, output_file_ifft, i; reg signed [15:0] real_part, imag_part; // 16-bit signed for real and imaginary parts reg signed [15:0] signed_QAMDataRe, signed_QAMDataIm; reg [5:0] FrameCnt; reg [10:0]cnt; // State to control the FFT and IFFT operations reg [2:0] stage; // // Instantiate the FFT module xfft_0 your_instance_name ( .aclk(Clk), // input wire aclk .s_axis_config_tdata(s_axis_config_tdata), // input wire [7 : 0] s_axis_config_tdata .s_axis_config_tvalid(s_axis_config_tvalid), // input wire s_axis_config_tvalid .s_axis_config_tready(s_axis_config_tready), // output wire s_axis_config_tready .s_axis_data_tdata(s_axis_data_tdata), // input wire [31 : 0] s_axis_data_tdata .s_axis_data_tvalid(s_axis_data_tvalid), // input wire s_axis_data_tvalid .s_axis_data_tready(s_axis_data_tready), // output wire s_axis_data_tready .s_axis_data_tlast(s_axis_data_tlast), // input wire s_axis_data_tlast .m_axis_data_tdata(m_axis_data_tdata), // output wire [47 : 0] m_axis_data_tdata .m_axis_data_tvalid(m_axis_data_tvalid), // output wire m_axis_data_tvalid .m_axis_data_tready(m_axis_data_tready), // input wire m_axis_data_tready .m_axis_data_tlast(m_axis_data_tlast), // output wire m_axis_data_tlast .event_frame_started(event_frame_started), // output wire event_frame_started .event_tlast_unexpected(event_tlast_unexpected), // output wire event_tlast_unexpected .event_tlast_missing(event_tlast_missing), // output wire event_tlast_missing .event_status_channel_halt(event_status_channel_halt), // output wire event_status_channel_halt .event_data_in_channel_halt(event_data_in_channel_halt), // output wire event_data_in_channel_halt .event_data_out_channel_halt(event_data_out_channel_halt) // output wire event_data_out_channel_halt ); // Clock generation always #5 Clk = ~Clk; // 100MHz clock// 定义一个64个元素的数组,每个元素是一个2位宽的字段,存储实部和虚部 reg [31:0] QAMData [0:63]; // 每个元素存储32位的数据:[signed_QAMDataIm, signed_QAMDataRe] reg [6:0] addr; // 地址计数器 initial begin Clk = 0; s_axis_config_tdata = 8'b0; // Default configuration s_axis_config_tvalid = 1'b0; s_axis_data_tvalid = 1'b0; s_axis_data_tlast = 1'b0; m_axis_data_tready = 1'b1; // Assume always ready to receive data stage = 0; FrameCnt = 0; cnt=0; // Open the FFT output data file file = $fopen("E:/FPGA_project/TCL_try240520/OTFS2/sim_result/input_data.txt", "r"); if (file == 0) begin $display("Error opening data file."); $finish; end // 读取文件并将数据存储到数组中 for (i = 0; i < 64; i = i + 1) begin if ($fscanf(file, "%d %d\n", real_part, imag_part) != 2) begin $display("Error reading file at line %d", i); $finish; end // 将读取的实部和虚部合并成一个16位的数据(假设每个部分为8位,符号扩展即可) QAMData[i] = {imag_part, real_part}; end $fclose(file); // Open the FFT output data file output_file_fft = $fopen("E:/FPGA_project/TCL_try240520/OTFS2/sim_result/output_data.txt", "w"); if (output_file_fft == 0) begin $display("Error opening FFT output file."); $finish; end // Open the IFFT output data file output_file_ifft =$fopen("E:/FPGA_project/TCL_try240520/OTFS2/sim_result/output_data_IFFT.txt", "w"); if (output_file_ifft == 0) begin $display("Error opening IFFT output file."); $finish; end s_axis_data_tlast = 1'b0; end // Handle output for IFFT, FFT, and IFFT->FFT sequence always @(posedge Clk) begin if (m_axis_data_tvalid) begin if (FrameCnt == 0) begin $display("FFT Output: Real = %d, Imag = %d", $signed(m_axis_data_tdata[18:0]), $signed(m_axis_data_tdata[42:24])); $fwrite(output_file_fft, "%d %d\n", $signed(m_axis_data_tdata[18:0]), $signed(m_axis_data_tdata[42:24])); end else if (FrameCnt == 1) begin $display("IFFT Output: Real = %d, Imag = %d", $signed(m_axis_data_tdata[18:0]), $signed(m_axis_data_tdata[42:24])); $fwrite(output_file_ifft, "%d %d\n", $signed(m_axis_data_tdata[18:0]), $signed(m_axis_data_tdata[42:24])); end end end // 时序逻辑见 pdf Figure 3-44 // stage 0 第一个的计算-配置输入 FFT // stage 1 配置 第一个计算-数据输入 // stage 2 第二个的计算-配置输入 // stage 3 配置 第二个计算-数据输入 always @(posedge Clk) begin if (s_axis_data_tready) begin case (stage) 0: begin stage <= 1; // Start with FFT configuration // --- s_axis_config_tdata(0) <= '1'; -- forward // --- s_axis_config_tdata(0) <= '0'; -- inverse s_axis_config_tdata <= 8'b00000001; // Forward FFT s_axis_config_tvalid <= 1'b1; s_axis_data_tvalid <= 1'b0; addr<=0; end 1: begin s_axis_config_tvalid <= 1'b0; if (addr < 64) begin // 从QAMData数组中读取数据,并设置传输信号 s_axis_data_tdata = QAMData[addr]; s_axis_data_tvalid <= 1'b1; s_axis_data_tlast <= (addr == 63) ? 1'b1 : 1'b0; // 最后一个数据的时钟周期设置tlast addr <= addr + 1; // 增加地址计数器 end else begin stage <= 2; end end 2: begin cnt<=cnt+1; stage <= 3; s_axis_config_tdata <= 8'b00000000; // Inverse FFT s_axis_config_tvalid <= 1'b1; s_axis_data_tvalid = 1'b0; addr<=0; end 3: begin s_axis_config_tvalid <= 1'b0; if (addr < 64) begin // 从QAMData数组中读取数据,并设置传输信号 s_axis_data_tdata = QAMData[addr]; s_axis_data_tvalid <= 1'b1; s_axis_data_tlast <= (addr == 63) ? 1'b1 : 1'b0; // 最后一个数据的时钟周期设置tlast addr <= addr + 1; // 增加地址计数器 end else begin stage <= 4; cnt<=cnt+1; end end 4: begin addr<=0; end default: begin stage <= 0; end endcase end end // Control the simulation stages and restart FFT with IFFT output always @(posedge Clk) begin if (m_axis_data_tvalid && m_axis_data_tlast) begin FrameCnt <= FrameCnt + 1; if (FrameCnt == 0) begin // FFT stage complete $display("FFT Complete. Starting IFFT stage."); $fclose(output_file_fft); end else if (FrameCnt == 1) begin // IFFT stage complete, end simulation $display("IFFT Complete. Ending testbench."); $fclose(output_file_ifft); $finish; end end endendmodule
02:xfft_0 ip配置
03. Matlab程序
%-------------------------------------------------------------------% 改动自% Example code for FFT v8.0 MEX function% $RCSfile: run_xfft_v9_1_mex.m,v $ $Version: $ $Date: 2010/09/08 12:33:21 $%%-------------------------------------------------------------------clear all;clc;close all;generics.C_NFFT_MAX=6;% 64点IFFTgenerics.C_ARCH=2;% Architecturegenerics.C_HAS_NFFT=0;% No Run time configurable transform lengthgenerics.C_USE_FLT_PT=0;% Fixed-Pointgenerics.C_INPUT_WIDTH=12; %Input data width (bits)generics.C_TWIDDLE_WIDTH=12; % Phase factor width (bits)generics.C_HAS_SCALING=0; % Scaling option: unscaledgenerics.C_HAS_BFP=0; % Scaling option: Applicable if C_HAS_SCALING=1generics.C_HAS_ROUNDING=0; % Rounding:0 = truncationchannels = 1;samples = 2^generics.C_NFFT_MAX;L = 16;LFSR = ones(L,1);Pol = zeros(L,1);Pol(11) = 1;Pol(13) = 1;Pol(14) = 1;Pol(16) = 1;x = zeros(2^L, 1);% backward LSFRfor k = 1:2^L temp = mod(LFSR' * Pol, 2); LFSR(2:L) = LFSR(1:L-1); LFSR(1) = temp; x(k) = LFSR(1);end% QAM modulationyxxx = qammod(x, 4, 'gray', 'InputType', 'bit', 'UnitAveragePower', true);% % Quantize the datadata_send = fixfun_floor_v2(yxxx(1:64), 0, 11); % Quantization% % Write the input data to a text file% % Open the file to write data% fileID = fopen('Sim_data/input_data.txt', 'w');%% % Loop through each complex value in data_send% for i = 1:length(data_send)% % Get the real and imaginary parts of the complex value% real_part = real(data_send(i));% imag_part = imag(data_send(i));%% % Quantize both real and imaginary parts with 11 fractional bits% quantized_real = round(real_part * 2^11); % 11 bits for fractional part% quantized_imag = round(imag_part * 2^11); % 11 bits for fractional part%% % Clamp to signed 12-bit range (1 bit for sign, 11 bits for fraction)% quantized_real = max(min(quantized_real, 2^11-1), -2^11); % [-2048, 2047]% quantized_imag = max(min(quantized_imag, 2^11-1), -2^11); % [-2048, 2047]%% % Write real and imaginary parts to the file as signed 12-bit integers% fprintf(fileID, '%d %d\n', quantized_real, quantized_imag);% end%% % Close the file% fclose(fileID);% Handle multichannel FFTs if required% channel 等于 1% Create input data frame: constant data% constant_input = 0.5 + 0.5j;% input_raw(1:samples) = constant_input;input_raw(1:samples)=data_send(1:samples);if generics.C_USE_FLT_PT == 0 % Set up quantizer for correct twos's complement, fixed-point format: one sign bit, C_INPUT_WIDTH-1 fractional bits q = quantizer([generics.C_INPUT_WIDTH, generics.C_INPUT_WIDTH-1], 'fixed', 'convergent', 'saturate'); % Format data for fixed-point input input = quantize(q,input_raw);else % Floating point interface - use data directly input = input_raw;end% Set point size for this transformnfft = generics.C_NFFT_MAX;% Set up scaling schedule: scaling_sch[1] is the scaling for the first stage% Scaling schedule to 1/N:% 2 in each stage for Radix-4/Pipelined, Streaming I/O% 1 in each stage for Radix-2/Radix-2 Liteif generics.C_ARCH == 1 || generics.C_ARCH == 3 scaling_sch = ones(1,floor(nfft/2)) * 2; if mod(nfft,2) == 1 scaling_sch = [scaling_sch 1]; endelse scaling_sch = ones(1,nfft);endif channels > 1 fprintf('Running the MEX function for channel %d...\n',channel)else fprintf('Running the MEX function...\n')end% Run the MEX function% Set FFT (1)[output, blkexp, overflow] = xfft_v9_1_bitacc_mex(generics, nfft, input, scaling_sch, 1);%or IFFT (0)[output_ifft, blkexp_ifft, overflow_ifft] = xfft_v9_1_bitacc_mex(generics, nfft, input, scaling_sch, 0);Xfft_outputdata = ImportOTFSData('E:\FPGA_project\TCL_try240520\OTFS2\sim_result\output_data.txt');Xfft_Result = complex(Xfft_outputdata(:,1), Xfft_outputdata(:,2));output_lianghua_fft= reshape(output*2^11,samples,1);Xfft_outputdata_ifft = ImportOTFSData('E:\FPGA_project\TCL_try240520\OTFS2\sim_result\output_data_IFFT.txt');Xfft_Result_ifft= complex(Xfft_outputdata_ifft(:,1), Xfft_outputdata_ifft(:,2));output_lianghua_ifft= reshape(output_ifft*2^11,samples,1);if isequal(output_lianghua_fft,Xfft_Result) disp("FFT结果一致");endif isequal(output_lianghua_ifft,Xfft_Result_ifft) disp("IFFT结果一致");end