config_placement.f90

!*****************************************************************************
! Copyright(C) 2011-2013 Intel Corporation. All Rights Reserved.
!
! The source code, information  and  material ("Material") contained herein is
! owned  by Intel Corporation or its suppliers or licensors, and title to such
! Material remains  with Intel Corporation  or its suppliers or licensors. The
! Material  contains proprietary information  of  Intel or  its  suppliers and
! licensors. The  Material is protected by worldwide copyright laws and treaty
! provisions. No  part  of  the  Material  may  be  used,  copied, reproduced,
! modified, published, uploaded, posted, transmitted, distributed or disclosed
! in any way  without Intel's  prior  express written  permission. No  license
! under  any patent, copyright  or  other intellectual property rights  in the
! Material  is  granted  to  or  conferred  upon  you,  either  expressly,  by
! implication, inducement,  estoppel or  otherwise.  Any  license  under  such
! intellectual  property  rights must  be express  and  approved  by  Intel in
! writing.
!
! *Third Party trademarks are the property of their respective owners.
!
! Unless otherwise  agreed  by Intel  in writing, you may not remove  or alter
! this  notice or  any other notice embedded  in Materials by Intel or Intel's
! suppliers or licensors in any way.
!
!*****************************************************************************
! Content:
! An example of using MKL DFTI configuration parameter DFTI_PLACEMENT.
! The parameter defines if the result overwrites the input data or not.
!
! Values:
!   DFTI_INPLACE (default) - result overwrites input data
!   DFTI_NOT_INPLACE       - result is placed in a separate array
!
! Note 1: In many cases in-place transforms need storage association of input
!         and output. When input and output have different type (e.g. real
!         transforms), the data can be storage associated by using Cray
!         pointers extension of Fortran.
!
! Note 2: When storage data types of forward and backward domains are
!         the same, the configuration parameters for the layout of input are
!         also used for the layout of output (e.g. output strides are
!         ignored). Otherwise, both input and output layout shall be defined
!         (for example real transform with conjugate even storage
!         set to DFTI_COMPLEX_COMPLEX).
!
! This example computes an in-place and an out-of-place 2D real transform.
!
!*****************************************************************************

program config_placement

  use MKL_DFTI, forget => DFTI_DOUBLE, DFTI_DOUBLE => DFTI_DOUBLE_R

  ! Sizes of 2D transform
  integer, parameter :: N1 = 100
  integer, parameter :: N2 = 200

  ! Arbitrary harmonic used to verify FFT
  integer, parameter :: H1 = 1
  integer, parameter :: H2 = 2

  ! Need double precision
  integer, parameter :: WP = selected_real_kind(15,307)

  ! Execution status
  integer :: status = 0, ignored_status

  ! Data arrays
  real(WP), allocatable :: xr(:,:)
  complex(WP), allocatable :: xc(:,:)

  ! DFTI descriptor handle
  type(DFTI_DESCRIPTOR), POINTER :: hand

  ! Strides define data layout for real and complex domains
  integer cstrides(3), rstrides(3)

  hand => null()

  print *,"Example config_placement"
  print *,"In-place and out-of-place 2D real FFT"
  print *,"Configuration parameters:"
  print '(" DFTI_FORWARD_DOMAIN = DFTI_REAL")'
  print '(" DFTI_PRECISION      = DFTI_DOUBLE")'
  print '(" DFTI_DIMENSION      = 2")'
  print '(" DFTI_LENGTHS        = /"I0","I0"/" )', N1, N2

  print *,"======= In-place 2D real FFT ======="

  print *,"Allocate data array"
  allocate ( xr(2*(N1/2+1), N2) )

  print *,"Create DFTI descriptor"
  status = DftiCreateDescriptor(hand, DFTI_DOUBLE, DFTI_REAL, 2, [N1,N2])
  if (0 /= status) goto 999

  print *,"Set CCE storage"
  status = DftiSetValue(hand, DFTI_CONJUGATE_EVEN_STORAGE, &
    DFTI_COMPLEX_COMPLEX)
  if (0 /= status) goto 999

  rstrides = [0, 1, size(xr,dim=1)   ]
  cstrides = [0, 1, size(xr,dim=1)/2 ]

  print '(" Set input  strides = "3(I0:", "))', rstrides
  status = DftiSetValue(hand, DFTI_INPUT_STRIDES, rstrides)
  if (0 /= status) goto 999

  ! This must be done because type of elements in forward
  ! and backward domain is different
  print '(" Set output strides = "3(I0:", "))', cstrides
  status = DftiSetValue(hand, DFTI_OUTPUT_STRIDES, cstrides)
  if (0 /= status) goto 999

  print *,"Commit DFTI descriptor"
  status = DftiCommitDescriptor(hand)
  if (0 /= status) goto 999

  print *,"Initialize data for real-to-complex FFT"
  call init_r(xr, N1, N2, H1, H2)

  print *,"Compute forward in-place transform"
  status = DftiComputeForward(hand, xr(:,1))
  if (0 /= status) goto 999

  print *,"Verify the result by viewing xr as a complex array"
  status = verify_r_as_c(xr, N1, N2, H1, H2)
  if (0 /= status) goto 999

  print *,"======= Reconfigure for out-on-place 2D real FFT ======="

  print *,"Allocate data array for output"
  allocate ( xc(N1/2+1, N2) )

  print *,"Set out-of-place"
  status = DftiSetValue(hand, DFTI_PLACEMENT, DFTI_NOT_INPLACE)
  if (0 /= status) goto 999

  rstrides = [0, 1, size(xr,dim=1) ]
  cstrides = [0, 1, size(xc,dim=1) ]

  print '(" Set input  strides = "3(I0:", "))', rstrides
  status = DftiSetValue(hand, DFTI_INPUT_STRIDES, rstrides)
  if (0 /= status) goto 999

  print '(" Set output strides = "3(I0:", "))', cstrides
  status = DftiSetValue(hand, DFTI_OUTPUT_STRIDES, cstrides)
  if (0 /= status) goto 999

  print *,"Commit DFTI descriptor"
  status = DftiCommitDescriptor(hand)
  if (0 /= status) goto 999

  print *,"Initialize data for real-to-complex FFT"
  call init_r(xr, N1, N2, H1, H2)

  print *,"Compute forward out-of-place transform"
  status = DftiComputeForward(hand, xr(:,1), xc(:,1) )
  if (0 /= status) goto 999

  print *,"Verify the result"
  status = verify_c(xc, N1, N2, H1, H2)
  if (0 /= status) goto 999

100 continue

  print *,"Release the DFTI descriptor"
  ignored_status = DftiFreeDescriptor(hand)

  if (allocated(xr) .or. allocated(xc)) then
    print *,"Deallocate data arrays"
  endif
  if (allocated(xr))  deallocate(xr)
  if (allocated(xc))  deallocate(xc)

  if (status == 0) then
    print *, "TEST PASSED"
    call exit(0)
  else
    print *, "TEST FAILED"
    call exit(1)
  end if

999 print '("  Error, status = ",I0)', status
  goto 100

contains

  ! Compute mod(K*L,M) accurately
  pure real(WP) function moda(k,l,m)
    integer, intent(in) :: k,l,m
    integer*8 :: k8
    k8 = k
    moda = real(mod(k8*l,m),WP)
  end function moda

  ! Initialize x(:,:) to harmonic H
  subroutine init_r(x, N1, N2, H1, H2)
    integer N1, N2, H1, H2
    real(WP) :: x(:,:)

    integer k1, k2
    real(WP), parameter:: TWOPI = 6.2831853071795864769_WP
    real(WP) :: factor

    factor = 2
    if (mod(H1,N1)==0 .and. mod(H2,N2)==0) factor = 1
    forall (k1=1:N1, k2=1:N2)
      x(k1,k2) = factor * cos(TWOPI*( moda(H1,k1-1,N1) / N1   &
        +                             moda(H2,k2-1,N2) / N2)) / (N1*N2)
    end forall
  end subroutine init_r

  ! Verify that r(:,:) has unit peak at (H1,H2)
  integer function verify_r_as_c(r, N1, N2, H1, H2)
    integer N1, N2, H1, H2
    real(WP) :: r(:,:)

    ! Use Cray pointer to associate complex view with the real array r.
    ! The first dimension of r must be divisible by 2.
    complex(WP) :: c( 1:size(r,dim=1)/2, 1:size(r,dim=2) )
    pointer (p,c)

    ! Associate complex view with r
    p = loc(r)

    verify_r_as_c = verify_c(c, N1, N2, H1, H2)
  end function verify_r_as_c

  ! Verify that x(:,:) has unit peak at (H1,H2)
  integer function verify_c(x, N1, N2, H1, H2)
    integer N1, N2, H1, H2
    complex(WP) :: x(:,:)

    integer k1, k2
    real(WP) err, errthr, maxerr
    complex(WP) res_exp, res_got

    ! Note, this simple error bound doesn't take into account error of
    ! input data
    errthr = 2.5 * log(real(N1*N2,WP)) / log(2.0_WP) * EPSILON(1.0_WP)
    print '("  Check if err is below errthr " G10.3)', errthr

    maxerr = 0
    do k2 = 1, N2
      do k1 = 1, N1/2+1
        if (mod(k1-1-H1,N1)==0 .and. mod(k2-1-H2,N2)==0) then
          res_exp = 1.0
        else if (mod(-k1+1-H1,N1)==0 .and. mod(-k2+1-H2,N2)==0) then
          res_exp = 1.0
        else
          res_exp = 0.0
        end if
        res_got = x(k1, k2)
        err = abs(res_got - res_exp)
        maxerr = max(err,maxerr)
        if (.not.(err < errthr)) then
          print '("  x("I0","I0"):"$)', k1,k2
          print '(" expected ("G24.17","G24.17"),"$)', res_exp
          print '(" got ("G24.17","G24.17"),"$)', res_got
          print '(" err "G10.3)', err
          print *,"  Verification FAILED"
          verify_c = 100
          return
        end if
      end do
    end do
    print '("  Verified,  maximum error was " G10.3)', maxerr
    verify_c = 0
  end function verify_c

end program config_placement
For more complete information about compiler optimizations, see our Optimization Notice.