From b410de64d0e3b98f513980b205b2622e064e9b1b Mon Sep 17 00:00:00 2001 From: huwei3 Date: Mon, 5 Dec 2022 20:06:41 +0800 Subject: [PATCH] [flang] add fortran memory align inverstigation. link #I61PTP --- ...sic-flang-memory-align-investigation.patch | 3379 +++++++++++++++++ flang.spec | 6 +- 2 files changed, 3384 insertions(+), 1 deletion(-) create mode 100644 8-add-fortran-and-gfortran-ifort-classic-flang-memory-align-investigation.patch diff --git a/8-add-fortran-and-gfortran-ifort-classic-flang-memory-align-investigation.patch b/8-add-fortran-and-gfortran-ifort-classic-flang-memory-align-investigation.patch new file mode 100644 index 0000000..8324be6 --- /dev/null +++ b/8-add-fortran-and-gfortran-ifort-classic-flang-memory-align-investigation.patch @@ -0,0 +1,3379 @@ +From 399d93f523dc6f44243acbfb68ec59b477bede28 Mon Sep 17 00:00:00 2001 +From: huwei3 +Date: Mon, 5 Dec 2022 17:38:32 +0800 +Subject: [PATCH] + 8-add-fortran-and-gfortran-ifort-classic-flang-memory-align-investigation + + +diff --git a/docs/gfortran-and-ifort-memory-align-investigation.md b/docs/gfortran-and-ifort-memory-align-investigation.md +new file mode 100644 +index 0000000..64bb226 +--- /dev/null ++++ b/docs/gfortran-and-ifort-memory-align-investigation.md +@@ -0,0 +1,2229 @@ ++## Fortran主流编译器(gfortran+ifort)内存对齐辅助能力分析报告 ++ ++ ++ ++- 版本号:1.0 ++ ++- 文档编辑:胡伟 ++ ++ - last modify:2022.11.22 ++ ++ ++ ++## 目录 ++ ++[TOC] ++ ++ ++ ++## 1.基础的内存对齐场景 ++ ++​ 本部分笔者抽取的Fortran基本内存对齐场景,是对照于C和C++的内存对齐场景而来,旨在为Fortran提供基础的内存对齐,为后续可能的优化提供基础。现描述如下: ++ ++> 场景1.基本数据类型 ++ ++- logical ++- character ++- integer ++- real ++- ==complex==[特殊] ++ ++> 场景2.复合类型 ++ ++- 第1类:同类型复合 ++ - ==array== ++- 第2类:不同类型复合 ++ - ==type== ++ ++> 场景3.函数边界 ++ ++- subroutine ++ ++- function ++ ++> 场景4.内存分配函数:allocator和deallocate的align实现 ++ ++- allocate ++ ++- deallocate ++ ++> 场景5:Fortran特有的—common块 ++ ++- common ++ ++ ++ ++ ++ ++ ++ ++## 2.gfortran内存对齐支持 ++ ++ 本部分的gfortran指的是GNU Fortran Compiler,笔者使用的版本是目前最新的gfortran10.4.0。 ++ ++### 2.1.场景1.基础变量类型 ++ ++ ++ ++#### 例1.f90-logical ++ ++```fortran ++program main ++ implicit none ++ logical(kind=1) :: a ++ logical(kind=2) :: b ++ logical(kind=4) :: c ++ logical(kind=8) :: d ++ logical(kind=16) :: e ++ print * , kind(a) ++ print * , kind(b) ++ print * , kind(c) ++ print * , kind(d) ++ print * , kind(e) ++ ++end program main ++ ++``` ++ ++```bash ++huwei@dell:~/exp$ gfortran demo.f90 ++huwei@dell:~/exp$ ./a.out ++ 1 ++ 2 ++ 4 ++ 8 ++ 16 ++``` ++ ++> gfortran支持logical的kind可为1,2,4,8,16 ++ ++ ++ ++#### 例2.f90-character ++ ++```fortran ++program main ++ implicit none ++ character(kind=1) :: a ++ print * , kind(a) ++ ++end program main ++``` ++ ++```bash ++huwei@dell:~/exp$ gfortran demo.f90 ++huwei@dell:~/exp$ ./a.out ++ 1 ++``` ++ ++> gfortran支持character的kind可为1 ++ ++ ++ ++#### 例3.f90-integer ++ ++```fortran ++program main ++ implicit none ++ integer(kind=1) :: a ++ integer(kind=2) :: b ++ integer(kind=4) :: c ++ integer(kind=8) :: d ++ integer(kind=16) :: e ++ print * , kind(a) ++ print * , kind(b) ++ print * , kind(c) ++ print * , kind(d) ++ print * , kind(e) ++end program main ++``` ++ ++```bash ++huwei@dell:~/exp$ gfortran demo.f90 ++huwei@dell:~/exp$ ./a.out ++ 1 ++ 2 ++ 4 ++ 8 ++ 16 ++``` ++ ++> gfortran支持integer的kind可为1,2,4,8,16 ++ ++```fortran ++program main ++ integer(kind=1) :: a ++ integer(kind=2) :: b ++ integer(kind=4) :: c ++ integer(kind=8) :: d ++ ++ write (*,*) 'location a=', loc(a) ++ write (*,*) 'location b=', loc(b) ++ write (*,*) 'location c=', loc(c) ++ write (*,*) 'location d=', loc(d) ++end program main ++ ++``` ++ ++```bash ++huwei@dell:~/exp$ gfortran demo.f90 ++huwei@dell:~/exp$ ./a.out ++ location a= 140733171523695 ++ location b= 140733171523692 ++ location c= 140733171523688 ++ location d= 140733171523680 ++``` ++ ++ ++ ++ ++ ++ ++ ++#### 例4-f90-real ++ ++```fortran ++program main ++ implicit none ++ real(kind=4) :: c ++ real(kind=8) :: d ++ real(kind=16) :: e ++ print * , kind(c) ++ print * , kind(d) ++ print * , kind(e) ++end program main ++``` ++ ++```bash ++huwei@dell:~/exp$ gfortran demo.f90 ++huwei@dell:~/exp$ ./a.out ++ 4 ++ 8 ++ 16 ++``` ++ ++> gfortran支持real的kind可为4,8,16 ++ ++ ++ ++#### 例5-f90-complex ++ ++```fortran ++program main ++ implicit none ++ complex(kind=4) :: c ++ complex(kind=8) :: d ++ complex(kind=16) :: e ++ print * , kind(c) ++ print * , kind(d) ++ print * , kind(e) ++end program main ++ ++``` ++ ++```bash ++huwei@dell:~/exp$ gfortran demo.f90 ++huwei@dell:~/exp$ ./a.out ++ 4 ++ 8 ++``` ++ ++> gfortran支持complex的kind可为4,8,16 ++ ++ ++ ++ ++ ++ ++ ++ ++ ++### 2.2.场景2.复合类型 ++ ++#### 例1-f90-数组 ++ ++```fortran ++program main ++ implicit none ++ integer(kind=4) :: test(1000) !修改kind ++ write (*,*) 'location test=', loc(test) ++ write (*,*) 'location test[1]=', loc(test(1)) ++ write (*,*) 'location test[2]=', loc(test(2)) ++end program main ++``` ++ ++```bash ++# 这是integer(kind=2) ++huwei@dell:~/exp$ gfortran demo.f90 ++huwei@dell:~/exp$ ./a.out ++ location test= 140733722788464 ++ location test[1]= 140733722788464 ++ location test[2]= 140733722788466 ++ ++# 这是integer(kind=4) ++huwei@dell:~/exp$ gfortran demo.f90 ++huwei@dell:~/exp$ ./a.out ++ location test= 140727331795312 ++ location test[1]= 140727331795312 ++ location test[2]= 140727331795316 ++``` ++ ++ ++ ++#### 例2-f90-type ++ ++```fortran ++program main ++ implicit none ++ type demo ++ integer(kind=4) :: a(3) ++ real(kind=4) :: b ++ logical(kind=1) :: c ++ end type ++ type(demo) :: test ++ write (*,*) 'location c=', loc(test%c) ++ write (*,*) 'location b=', loc(test%b) ++ write (*,*) 'location a=', loc(test%a) ++end program main ++``` ++ ++```bash ++huwei@dell:~/exp$ gfortran demo.f90 ++huwei@dell:~/exp$ ./a.out ++ location c= 140726372950768 ++ location b= 140726372950764 ++ location a= 140726372950752 ++``` ++ ++ ++ ++ ++ ++ ++ ++### 2.3.场景3.函数边界 ++ ++​ “函数”是自定义函数和子程序的统称。 ++ ++#### 例1-f90-subroutine ++ ++```fortran ++program main ++ call one() ++ call two() ++ write( *, *), 'subroutine-one-addr=', loc(one) ++ write( *, *) , 'subroutine-two-addr=', loc(two) ++ ++end program main ++ ++subroutine one() ++ integer :: a=3 ++ print * , a ++end subroutine one ++ ++subroutine two() ++ print * , 'two' ++end subroutine two ++``` ++ ++```bash ++huwei@dell:~/exp$ gfortran demo.f90 ++demo.f90:4:13: ++ ++ 4 | write( *, *), 'subroutine-one-addr=', loc(one) ++ | 1 ++Warning: Legacy Extension: Comma before i/o item list at (1) ++demo.f90:5:13: ++ ++ 5 | write( *, *) , 'subroutine-two-addr=', loc(two) ++ | 1 ++Warning: Legacy Extension: Comma before i/o item list at (1) ++huwei@dell:~/exp$ ./a.out ++ 3 ++ two ++ subroutine-one-addr= 94600145211900 ++ subroutine-two-addr= 94600145211785 ++ ++``` ++ ++ ++ ++#### 例2-f90-function ++ ++```fortran ++program main ++ integer(kind=4) :: a=2 ++ integer(kind=4) :: b=3 ++ integer(kind=4) :: c ++ integer(kind=4) :: d ++ write(*,*) myadd(a,b) ++ write(*,*) mymuli(a,b) ++ write( *, *), 'function-myadd-addr=', loc(myadd) ++ write( *, *) , 'function-mymuli-addr=', loc(testadd) ++end program main ++ ++function myadd(first, second) ++ integer(kind=4):: first , second ++ integer(kind=4):: myadd ++ myadd= first + second ++ return ++end function myadd ++ ++function mymuli(first , second) ++ integer(kind=4):: first , second ++ integer(kind=4):: mymuli ++ mymuli= first*second ++ return ++end function mymuli ++``` ++ ++```bash ++huwei@dell:~/exp$ gfortran demo.f90 ++demo.f90:8:13: ++ ++ 8 | write( *, *), 'function-myadd-addr=', loc(myadd) ++ | 1 ++Warning: Legacy Extension: Comma before i/o item list at (1) ++demo.f90:9:13: ++ ++ 9 | write( *, *) , 'function-mymuli-addr=', loc(testadd) ++ | 1 ++Warning: Legacy Extension: Comma before i/o item list at (1) ++huwei@dell:~/exp$ ./a.out ++ 5 ++ 6 ++ function-myadd-addr= 94102100795820 ++ function-mymuli-addr= 140728935620748 ++ ++``` ++ ++ ++ ++ ++ ++### 2.4.场景4.内存分配函数 ++ ++#### 例1-f90 ++ ++```fortran ++program main ++ implicit none ++ real, dimension (:,:), allocatable :: darray ++ integer :: s1, s2 ++ integer :: i, j ++ print*, "Enter the size of the array:" ++ s1=2 ++ s2=2 ++ ! allocate memory ++ allocate ( darray(s1,s2) ) ++ ++ do i = 1, s1 ++ do j = 1, s2 ++ darray(i,j) = i*j ++ print*, "darray(",i,",",j,") = ", darray(i,j) ++ end do ++ end do ++ ++ write(*,*) 'addr darray(1,1)' , loc( darray(1,1) ) ++ write(*,*) 'addr darray(2,1)' , loc( darray(2,1) ) ++ write(*,*) 'addr darray(1,2)' , loc( darray(1,2) ) ++ write(*,*) 'addr darray(2,2)' , loc( darray(2,2) ) ++ deallocate (darray) ++end program main ++ ++``` ++ ++```bash ++huwei@dell:~/exp$ gfortran demo.f90 ++huwei@dell:~/exp$ ./a.out ++ Enter the size of the array: ++ darray( 1 , 1 ) = 1.00000000 ++ darray( 1 , 2 ) = 2.00000000 ++ darray( 2 , 1 ) = 2.00000000 ++ darray( 2 , 2 ) = 4.00000000 ++ addr darray(1,1) 94578464733456 ++ addr darray(2,1) 94578464733460 ++ addr darray(1,2) 94578464733464 ++ addr darray(2,2) 94578464733468 ++``` ++ ++ ++ ++ ++ ++### 2.5.场景5.common ++ ++#### 例1-f90 ++ ++- 如下,源码改造自彭国伦的《Fortran95程序设计》^1^ ++ ++```fortran ++program ex0810 ++ implicit none ++ integer :: a, b ++ common a, b !定义a,b是全局变量中的第1及第2个变量 ++ a=1 ++ b=2 ++ call ShowCommon() ++ write(*,*) , 'common begin a addr=', loc(a) ++ write(*,*) , 'common begin b addr=', loc(b) ++ ++ stop ++ ++end program ex0810 ++ ++subroutine ShowCommon() ++ implicit none ++ integer :: num1, num2 ++ common num1, num2 !定义num1,num2是全局变量中的第1及第2个变量 ++ write(*,*) num1, num2 ++ return ++ ++end subroutine ShowCommon ++``` ++ ++```bash ++huwei@dell:~/exp$ gfortran demo.f90 ++demo.f90:8:13: ++ ++ 8 | write(*,*) , 'common begin a addr=', loc(a) ++ | 1 ++Warning: Legacy Extension: Comma before i/o item list at (1) ++demo.f90:9:13: ++ ++ 9 | write(*,*) , 'common begin b addr=', loc(b) ++ | 1 ++Warning: Legacy Extension: Comma before i/o item list at (1) ++huwei@dell:~/exp$ ./a.out ++ 1 2 ++ common begin a addr= 94585262964768 ++ common begin b addr= 94585262964772 ++ ++``` ++ ++​ 关于common块中基本数据类型对齐和stack中是不同的,除了gfortran有此类区别外,比如oracle公司的Fortran编译器在不同架构^11^下的common和stack中的数据类型对齐也存在不同,其中典型不同的,笔者摘抄了一部分有如下表格: ++ ++| **Fortran 77 Data Type** | **Size** | Default Alignment | Default Alignment | Alignment in COMMON | Alignment in COMMON | ++| ------------------------ | -------- | ----------------- | ----------------- | ------------------- | ------------------- | ++| | | SPARC | x86 | SPARC | x86 | ++| INTEGER X | 4 | 4 | 4 | 4 | 4 | ++| INTEGER(kind=2)X | 2 | 2 | 2 | 2 | 2 | ++| INTEGER(kind=4)X | 4 | 4 | 4 | 4 | 4 | ++| INTEGER(kind=8)X | 8 | 8 | 4 | 4 | 4 | ++| | | | | | | ++| LOGICAL X | 4 | 4 | 4 | 4 | 4 | ++| LOGICAL(kind=1) X | 1 | 1 | 1 | 1 | 1 | ++| LOGICAL(kind=2) X | 2 | 2 | 2 | 2 | 2 | ++| LOGICAL(kind=4) X | 4 | 4 | 4 | 4 | 4 | ++| LOGICAL(kind=8) X | 8 | 8 | 4 | 4 | 4 | ++ ++ ++ ++ ++ ++### 2.6.扩展-选项 ++ ++#### 2.6.1.设置common块 ++ ++- `-Wno-align-commons` ++ ++- `-Walign-commons` ++ ++​ 默认情况下,gfortran会对任何情况下的变量进行填充以在**COMMON块内**进行正确对齐发出警告。这个警告可以通过' `-Wno-align-commons` '关闭。参见' `-falign-commons` '。 ++ ++- `-falign-commons` ++- `-fno-align-commons` ++ ++**默认情况下**,gfortran通过根据需要**填充COMMON块中的所有变量**,**强制它们正确对齐。** ++在某些平台上这是强制性的,在其他平台上它可以提高性能。 ++ ++如果一个COMMON块没有在所有地方声明一致的数据类型,这种填充会导致麻烦,' `-fno-align-commons` '可以用来禁用自动对齐。该选项的相同形式应用于共享一个COMMON块的所有文件。 ++ ++默认情况下, `gfortran` 通过根据需要填充所有变量来==强制正确对齐 `COMMON` 块中的所有变量==。**在某些平台上,这是必需的,而在其他平台上,这可以提高性**能。如果未在所有地方都未声明具有一致数据类型的 `COMMON` 块,则这种填充会引起麻烦,并且-fno-align-commons可用于禁用自动对齐。对于共享一个 `COMMON` 块的所有文件,都应使用此选项的相同形式。为避免 `COMMON` 块中可能存在的对齐问题,建议将对象从大到小排序。 ++ ++**为了避免COMMON块中潜在的对齐问题,建议将对象从最大到最小排序。** ++ ++```fortran ++program main ++ implicit none ++ character(kind=1) ::a ++ integer(kind=8) :: b ++ common a, b !定义a,b是全局变量中的第1及第2个变量 ++ a='a' ++ b=2 ++ write(*,*) , 'common begin a addr=', loc(a) ++ write(*,*) , 'common begin b addr=', loc(b) ++ ++end program main ++``` ++ ++```bash ++huwei@dell:~/exp$ gfortran demo.f90 ++demo.f90:8:13: ++ ++ 8 | write(*,*) , 'common begin a addr=', loc(a) ++ | 1 ++Warning: Legacy Extension: Comma before i/o item list at (1) ++demo.f90:9:13: ++ ++ 9 | write(*,*) , 'common begin b addr=', loc(b) ++ | 1 ++Warning: Legacy Extension: Comma before i/o item list at (1) ++demo.f90:5:9: ++ ++ 5 | common a, b !定义a,b是全局变量中的第1及第2个变量 ++ | 1 ++Warning: Padding of 7 bytes required before ‘b’ in COMMON at (1); reorder elements or use ‘-fno-align-commons’ [-Walign-commons] ++huwei@dell:~/exp$ ./a.out ++ common begin a addr= 94220087132192 ++ common begin b addr= 94220087132200 ++huwei@dell:~/exp$ gfortran demo.f90 -fno-align-commons ++demo.f90:8:13: ++ ++ 8 | write(*,*) , 'common begin a addr=', loc(a) ++ | 1 ++Warning: Legacy Extension: Comma before i/o item list at (1) ++demo.f90:9:13: ++ ++ 9 | write(*,*) , 'common begin b addr=', loc(b) ++ | 1 ++Warning: Legacy Extension: Comma before i/o item list at (1) ++huwei@dell:~/exp$ ./a.out ++ common begin a addr= 94317864271904 ++ common begin b addr= 94317864271905 ++huwei@dell:~/exp$ gfortran demo.f90 -falign-commons ++demo.f90:8:13: ++ ++ 8 | write(*,*) , 'common begin a addr=', loc(a) ++ | 1 ++Warning: Legacy Extension: Comma before i/o item list at (1) ++demo.f90:9:13: ++ ++ 9 | write(*,*) , 'common begin b addr=', loc(b) ++ | 1 ++Warning: Legacy Extension: Comma before i/o item list at (1) ++demo.f90:5:9: ++ ++ 5 | common a, b !定义a,b是全局变量中的第1及第2个变量 ++ | 1 ++Warning: Padding of 7 bytes required before ‘b’ in COMMON at (1); reorder elements or use ‘-fno-align-commons’ [-Walign-commons] ++huwei@dell:~/exp$ ./a.out ++ common begin a addr= 94528684232736 ++ common begin b addr= 94528684232744 ++huwei@dell:~/exp$ gfortran demo.f90 -Wno-align-commons ++demo.f90:8:13: ++ ++ 8 | write(*,*) , 'common begin a addr=', loc(a) ++ | 1 ++Warning: Legacy Extension: Comma before i/o item list at (1) ++demo.f90:9:13: ++ ++ 9 | write(*,*) , 'common begin b addr=', loc(b) ++ | 1 ++Warning: Legacy Extension: Comma before i/o item list at (1) ++huwei@dell:~/exp$ ./a.out ++ common begin a addr= 94479122538528 ++ common begin b addr= 94479122538536 ++huwei@dell:~/exp$ gfortran demo.f90 -Walign-commons ++demo.f90:8:13: ++ ++ 8 | write(*,*) , 'common begin a addr=', loc(a) ++ | 1 ++Warning: Legacy Extension: Comma before i/o item list at (1) ++demo.f90:9:13: ++ ++ 9 | write(*,*) , 'common begin b addr=', loc(b) ++ | 1 ++Warning: Legacy Extension: Comma before i/o item list at (1) ++demo.f90:5:9: ++ ++ 5 | common a, b !定义a,b是全局变量中的第1及第2个变量 ++ | 1 ++Warning: Padding of 7 bytes required before ‘b’ in COMMON at (1); reorder elements or use ‘-fno-align-commons’ [-Walign-commons] ++huwei@dell:~/exp$ ./a.out ++ common begin a addr= 94561412796448 ++ common begin b addr= 94561412796456 ++ ++``` ++ ++ ++ ++#### 2.6.2.结构类型的padding使用 ++ ++​ 结构可以包含一个名为`%FILL`的特殊字段。这将创建一个匿名组件,它不能被访问,但会占用空间,就像在其位置声明了一个相同类型的组件一样,这对于对齐很有用。例如,下面的结构至少包含16个字节:^13^ ++ ++```fortran ++structure /padded/ ++ character(4) start ++ character(8) %FILL ++ character(4) end ++end structure ++``` ++ ++ ++ ++ ++ ++ ++ ++## 3.ifort内存对齐支持 ++ ++​ 时至今日,从2021.1版本开始,目前Intel®在Intel oneAPI HPC Toolkit 提供两个 Fortran 编译器,两者区别如下: ++ ++- Intel®Fortran Compiler **Classic**(简称==ifort==),是英特尔**传统的Fortran编译器**,时至今日,已经有很长的历史,他是一个完全支持 Fortran 2018 的成熟编译器; ++ ++- **Intel® Fortran Compiler**(简称 ==ifx==),他是由英特尔开发的一款基于Intel Fortran Compiler Classic (ifort)前端和运行时库,使用LLVM后端技术的新的测试版本编译器。支持Linux、MacOS、Windows等系统,它目前支持 Fortran 95 和部分更新版本的标准。^6^ ++ ++​ 本部分笔者使用的是**ifort(Intel® Fortran Compiler Classic)2021.5.0**,使用的硬件系统是**64位**的**X64**平台,操作系统是Ubuntu的**Linux**发行版。 ++ ++```bash ++whoway@VMwhoway:~/ifortTemp$ ifort -v ++ifort version 2021.5.0 ++whoway@VMwhoway:~/ifortTemp$ if ++if ifconfig ifort ifx ++whoway@VMwhoway:~/ifortTemp$ ifx -v ++ifx version 2022.0.0 ++``` ++ ++ ++ ++ ++ ++### 3.1.场景1.基础变量类型 ++ ++ ++ ++#### 例1.f90-logical ++ ++```fortran ++program main ++ implicit none ++ logical(kind=1) :: a ++ logical(kind=2) :: b ++ logical(kind=4) :: c ++ logical(kind=8) :: d ++ print * , kind(a) ++ print * , kind(b) ++ print * , kind(c) ++ print * , kind(d) ++ ++end program main ++ ++``` ++ ++```bash ++huwei@dell:~/exp$ ifort demo.f90 ++huwei@dell:~/exp$ ./a.out ++ 1 ++ 2 ++ 4 ++ 8 ++``` ++ ++> ifort支持logical的kind可为1,2,4,8(不支持16) ++ ++ ++ ++#### 例2.f90-character ++ ++```fortran ++program main ++ implicit none ++ character(kind=1) :: a ++ print * , kind(a) ++ ++end program main ++``` ++ ++```bash ++huwei@dell:~/exp$ ifort demo.f90 ++huwei@dell:~/exp$ ./a.out ++ 1 ++``` ++ ++> ifort支持character的kind可为1 ++ ++ ++ ++ ++ ++#### 例3.f90-integer ++ ++```fortran ++program main ++ implicit none ++ integer(kind=1) :: a ++ integer(kind=2) :: b ++ integer(kind=4) :: c ++ integer(kind=8) :: d ++ print * , kind(a) ++ print * , kind(b) ++ print * , kind(c) ++ print * , kind(d) ++end program main ++``` ++ ++```bash ++huwei@dell:~/exp$ ifort demo.f90 ++huwei@dell:~/exp$ ./a.out ++ 1 ++ 2 ++ 4 ++ 8 ++ 16 ++``` ++ ++> ifort支持integer的kind可为1,2,4,8(不支持16) ++ ++```fortran ++program main ++ integer(kind=1) :: a ++ integer(kind=2) :: b ++ integer(kind=4) :: c ++ integer(kind=8) :: d ++ ++ write (*,*) 'location a=', loc(a) ++ write (*,*) 'location b=', loc(b) ++ write (*,*) 'location c=', loc(c) ++ write (*,*) 'location d=', loc(d) ++end program main ++ ++``` ++ ++```bash ++huwei@dell:~/exp$ ifort demo.f90 ++huwei@dell:~/exp$ ./a.out ++ location a= 140720984871726 ++ location b= 140720984871724 ++ location c= 140720984871720 ++ location d= 140720984871704 ++``` ++ ++ ++ ++ ++ ++ ++ ++#### 例4-f90-real ++ ++```fortran ++program main ++ implicit none ++ real(kind=4) :: c ++ real(kind=8) :: d ++ real(kind=16) :: e ++ print * , kind(c) ++ print * , kind(d) ++ print * , kind(e) ++end program main ++``` ++ ++```bash ++huwei@dell:~/exp$ ifort demo.f90 ++huwei@dell:~/exp$ ./a.out ++ 4 ++ 8 ++ 16 ++``` ++ ++> ifort支持real的kind可为4,8,16 ++ ++ ++ ++#### 例5-f90-complex ++ ++```fortran ++program main ++ implicit none ++ complex(kind=4) :: c ++ complex(kind=8) :: d ++ complex(kind=16) :: e ++ print * , kind(c) ++ print * , kind(d) ++ print * , kind(e) ++end program main ++ ++``` ++ ++```bash ++huwei@dell:~/exp$ ifort demo.f90 ++huwei@dell:~/exp$ ./a.out ++ 4 ++ 8 ++ 16 ++``` ++ ++> ifort支持complex的kind可为4,8,16 ++ ++ ++ ++ ++ ++ ++ ++### 3.2.场景2.复合类型 ++ ++#### 例1-f90-数组 ++ ++```fortran ++program main ++ implicit none ++ integer(kind=4) :: test(1000) !修改 ++ write (*,*) 'location test=', loc(test) ++ write (*,*) 'location test[1]=', loc(test(1)) ++ write (*,*) 'location test[2]=', loc(test(2)) ++end program main ++``` ++ ++```bash ++# 这是integer(kind=2) ++huwei@dell:~/exp$ ifort demo.f90 ++huwei@dell:~/exp$ ./a.out ++ location test= 5001728 ++ location test[1]= 5001728 ++ location test[2]= 5001730 ++ ++# 这是integer(kind=4) ++huwei@dell:~/exp$ ifort demo.f90 ++huwei@dell:~/exp$ ./a.out ++ location test= 5001728 ++ location test[1]= 5001728 ++ location test[2]= 5001732 ++``` ++ ++ ++ ++#### 例2-f90-type ++ ++```fortran ++program main ++ implicit none ++ type demo ++ integer(kind=4) :: a(3) ++ real(kind=4) :: b ++ logical(kind=1) :: c ++ end type ++ type(demo) :: test ++ write (*,*) 'location c=', loc(test%c) ++ write (*,*) 'location b=', loc(test%b) ++ write (*,*) 'location a=', loc(test%a) ++end program main ++``` ++ ++```bash ++huwei@dell:~/exp$ ifort demo.f90 ++huwei@dell:~/exp$ ./a.out ++ location c= 5001728 ++ location b= 5001724 ++ location a= 5001712 ++ ++``` ++ ++ ++ ++ ++ ++### 3.3.场景3.函数边界 ++ ++​ “函数”是自定义函数和子程序的统称。 ++ ++#### 例1-f90-subroutine ++ ++```fortran ++program main ++ call one() ++ call two() ++ write( *, *), 'subroutine-one-addr=', loc(one) ++ write( *, *) , 'subroutine-two-addr=', loc(two) ++ ++end program main ++ ++subroutine one() ++ integer :: a=3 ++ print * , a ++end subroutine one ++ ++subroutine two() ++ print * , 'two' ++end subroutine two ++``` ++ ++```bash ++huwei@dell:~/exp$ ifort demo.f90 ++huwei@dell:~/exp$ ./a.out ++ 3 ++ two ++ subroutine-one-addr= 4209248 ++ subroutine-two-addr= 4209168 ++``` ++ ++ ++ ++#### 例2-f90-function ++ ++```fortran ++program main ++ integer(kind=4) :: a=2 ++ integer(kind=4) :: b=3 ++ integer(kind=4) :: c ++ integer(kind=4) :: d ++ write(*,*) myadd(a,b) ++ write(*,*) mymuli(a,b) ++ write( *, *), 'function-myadd-addr=', loc(myadd) ++ write( *, *) , 'function-mymuli-addr=', loc(testadd) ++end program main ++ ++function myadd(first, second) ++ integer(kind=4):: first , second ++ integer(kind=4):: myadd ++ myadd= first + second ++ return ++end function myadd ++ ++function mymuli(first , second) ++ integer(kind=4):: first , second ++ integer(kind=4):: mymuli ++ mymuli= first*second ++ return ++end function mymuli ++``` ++ ++```bash ++huwei@dell:~/exp$ ifort demo.f90 ++huwei@dell:~/exp$ ./a.out ++ 5 ++ 6 ++ function-myadd-addr= 4209120 ++ function-mymuli-addr= 140731498931584 ++ ++``` ++ ++ ++ ++ ++ ++### 3.4.场景4.内存分配函数 ++ ++ ++ ++#### 例1-f90 ++ ++参考了来源,==可分配数组==和指针的对齐^7^ ++ ++```fortran ++mod.f90: ++ ++module tmod ++ implicit none ++ real, allocatable, dimension(:) :: A, B, C ++ !dir$ attributes align:32 :: A, B, C ++ !dir$ assume_aligned A:32, B:32, C:32 ++end module tmod ++ ++``` ++ ++```fortran ++t.f90: ++ ++subroutine test ++ use tmod ++ implicit none ++ integer :: i ++i = 1, 1024 ++ A(i) = 2.*B(i) + C(i) ++ enddo ++end ++``` ++ ++```bash ++ifort -c -xavx -qopt-report=4 -qopt-report-file=stderr -qopt-report-phase=vec t.f90 ++… ++ remark #15388: vectorization support: reference tmod_mp_a_ has aligned access [ t.f90(6,5) ] ++ remark #15389: vectorization support: reference tmod_mp_b_ has unaligned access [ t.f90(6,5) ] ++ remark #15389: vectorization support: reference tmod_mp_c_ has unaligned access [ t.f90(6,5) ] ++``` ++ ++ ++ ++#### 例2-f90 ++ ++```fortran ++program main ++ implicit none ++ real, dimension (:,:), allocatable :: darray ++ integer :: s1, s2 ++ integer :: i, j ++ print*, "Enter the size of the array:" ++ s1=2 ++ s2=2 ++ ! allocate memory ++ allocate ( darray(s1,s2) ) ++ ++ do i = 1, s1 ++ do j = 1, s2 ++ darray(i,j) = i*j ++ print*, "darray(",i,",",j,") = ", darray(i,j) ++ end do ++ end do ++ ++ write(*,*) 'addr darray(1,1)' , loc( darray(1,1) ) ++ write(*,*) 'addr darray(2,1)' , loc( darray(2,1) ) ++ write(*,*) 'addr darray(1,2)' , loc( darray(1,2) ) ++ write(*,*) 'addr darray(2,2)' , loc( darray(2,2) ) ++ deallocate (darray) ++end program main ++ ++``` ++ ++```bash ++huwei@dell:~/exp$ ifort demo.f90 ++huwei@dell:~/exp$ ./a.out ++ Enter the size of the array: ++ darray( 1 , 1 ) = 1.000000 ++ darray( 1 , 2 ) = 2.000000 ++ darray( 2 , 1 ) = 2.000000 ++ darray( 2 , 2 ) = 4.000000 ++ addr darray(1,1) 6579296 ++ addr darray(2,1) 6579300 ++ addr darray(1,2) 6579304 ++ addr darray(2,2) 6579308 ++``` ++ ++ ++ ++ ++ ++### 3.5.场景5.common ++ ++#### 例1-f90 ++ ++- 如下,源码改造自彭国伦的《Fortran95程序设计》^1^ ++ ++```fortran ++program ex0810 ++ implicit none ++ integer :: a, b ++ common a, b !定义a,b是全局变量中的第1及第2个变量 ++ a=1 ++ b=2 ++ call ShowCommon() ++ write(*,*) , 'common begin a addr=', loc(a) ++ write(*,*) , 'common begin b addr=', loc(b) ++ ++ stop ++ ++end program ex0810 ++ ++subroutine ShowCommon() ++ implicit none ++ integer :: num1, num2 ++ common num1, num2 !定义num1,num2是全局变量中的第1及第2个变量 ++ write(*,*) num1, num2 ++ return ++ ++end subroutine ShowCommon ++``` ++ ++```bash ++huwei@dell:~/exp$ ifort demo.f90 ++huwei@dell:~/exp$ ./a.out ++ 1 2 ++ common begin a addr= 5356480 ++ common begin b addr= 5356484 ++ ++``` ++ ++ ++ ++ ++ ++### 3.6.扩展-引导语 ++ ++ ++ ++#### 3.6.1.`ASSUME` ++ ++​ 通用的编译器引导语:向编译器优化器提供启发式信息 ++ ++```fortran ++!DIR$ ASSUME (scalar-Boolean-expression) ++scalar-Boolean-expression !计算结果为.TRUE.或.FALSE.的任何表达式。在运行时。 ++``` ++ ++**标量布尔表达式**(scalar-Boolean-expression)假定为真,优化器(optimizer)可以使用它来生成更好的代码。 ++ ++如果指定了检查假设选项(check assume option),并且标量布尔表达式的计算结果不为`.TTRUE.`在运行时,将显示错误消息并中止执行。 ++ ++##### 例1-f90 ++ ++​ 在下面的示例中,编译器被告知A使用`ASSUME_aligned`指令在32字节边界上对齐。 ++ ++​ ASSUME指令表示A的第一维度的长度是8的倍数。因此,优化器知道`A(I,J+1)`和`A(I,J-1)`距离`A(I,J)`多64字节为0,因此也在32字节边界上对齐。此信息有助于优化器为这些循环生成有效的向量化代码。^3^ ++ ++```fortran ++SUBROUTINE F (A, NX,NY,I1,I2,J1,J2) ++REAL (8) :: A (NX,NY) ++!DIR$ ASSUME_ALIGNED A:32 ++!DIR$ ASSUME (MOD(NX,8) .EQ. 0) ++! ensure that the first array access in the loop is aligned ++!DIR$ ASSUME (MOD(I1,8) .EQ. 1) ++DO J=J1,J2 ++ DO I=I1,I2 ++ A(I,J) = A(I,J) + A(I,J+1) + A(I,J-1) ++ ENDDO ++ENDDO ++END SUBROUTINE F ++``` ++ ++ ++ ++ ++ ++#### 3.6.2.`ASSUME_ALIGNED` ++ ++​ 通用的编译器引导语:指定对齐内存中的实体。 ++ ++```fortran ++!DIR$ ASSUME_ALIGNED address1:n1 [, address2:n2]... ++``` ++ ++***address*** ++ ++​ 一个数组变量。它可以是任何数据type、 kind或rank >0的数据。它可以是派生类型变量的数组组件,也可以是关联的记录字段引用、主机或使用,或者具有ALLOCATABLE或POINTER属性。 ++它**不能**是以下任何一项: ++ ++- COMMON中的实体(或等同于COMMON中某事物的实体) ++ ++- 派生类型(derived type )变量或记录字段引用的组件 ++ ++- 通过使用或主机关联访问的实体(An entity accessed by use or host association) ++ ++ 如果它是一个module变量,则该地址将被忽略。 ++ ++***n*** ++ ++​ 正整数常量表达式。它的值必须是介于1和256之间的2的幂,它以地址字节为单位指定内存对齐方式。 ++ ++`ASSUME_ALIGNED`引导语必须出现在**规范语句部分**之后或**可执行语句部分**内部。 ++ ++如果指定了多个地址:**n**项,则必须用逗号分隔。 ++ ++如果地址是`Cray POINTER`或具有`POINTER`属性,则它`是POINTER`,而不是假定对齐的指针对象或`TARGET`。 ++ ++如果指定了检查假设选项,并且在运行时地址未在n字节边界上对齐,则会显示错误消息并中止执行。 ++ ++有关更多信息,请参阅`ASSUME`指令描述中的示例。以下示例显示了`ASSUME_ALIGNED`指令的正确位置和用法:^3^ ++ ++##### 例1-f90 ++ ++```fortran ++ TYPE NODE ++ REAL(KIND=8), POINTER :: A(:,:) ++ END TYPE NODE ++ ++ TYPE(NODE), POINTER :: NODES ++ ALLOCATE(NODES) ++ ALLOCATE(NODES%A(1000,1000)) ++!DIR$ ASSUME_ALIGNED NODES%A(1,1) : 16 ++ DO I=1,N ++ NODES%A(1,I) = NODES%A(1,I)+1 ++ ENDDO ++… ++END ++``` ++ ++在**type定义**中放置`ASSUME_ALIGNED`是非法的;例如: ++ ++```fortran ++TYPE S ++!DIR$ ASSUME_ALIGNED T : 16 ! this is an error ++ REAL(8), ALLOCATABLE :: T(:) ++END TYPE S ++``` ++ ++ ++ ++ ++ ++#### 3.6.3.`ATTRIBUTES ALIGN` ++ ++​ `ATTRIBUTES`引导语选项`ALIGN`指定**变量**以及**派生类型**(derived types)的可分配(allocatable )或指针(pointer)组件的字节对齐方式。 ++ ++```fortran ++!DIR$ ATTRIBUTES ALIGN: n:: object ++``` ++ ++***n*** ++ ++​ 最小对齐边界的字节数。 ++ ++​ 对于可分配对象(allocatable ),边界值必须是2的幂,例如1、2、4、8、16、32、64、128等。在Linux和macOS系统上,n的值必须介于1和2097152(221)之间,在Windows系统上必须介于1到8192(213)之间。 ++​ 对于不可分配(non-allocatable )对象,边界值必须是2的幂,在Windows系统上介于1和64之间,在Linux系统上介于1和65536(216)之间,在macOS系统上介于1和65536(212)之间。 ++ ++***object*** ++ ++​ 要对齐的派生类型的变量或可分配或指针组件。 ++ ++​ 可以通过此指令对齐的对象包括**静态局部变量**、**自动变量**、**模块变量**、**动态分配的数组**、**派生类型的可分配数组组件**以及**common块的开头**。此指令**不能**用于对齐**common块中的变量**。 ++ ++​ 如果在具有`ALLOCATABLE`或`POINTER`属性的对象上指定引导语` !DIR$ ATTRIBUTES ALIGN`,`ALLOCATE`语句将在分配内存时尝试使用该对齐方式。 ++ ++​ 对于派生类型的可分配或指针组件,该指令必须出现在派生类型`type…END type`块中。 ++ ++​ 如果TYPE是扩展类型,则指令不能引用父类型中的组件 ++ ++```fortran ++TYPE EXAMPLE ++!DIR$ ATTRIBUTES ALIGN : 64 :: R_alloc ++REAL, ALLOCATABLE :: R_alloc ( : ) ++REAL :: R_scalar ++INTEGER :: I_nonalloc(25) ++END TYPE EXAMPLE ++TYPE (EXAMPLE) :: MyVar ++ALLOCATE (MyVar%R_alloc(1000)) ! Memory is allocated aligned at a 64-byte boundary ++``` ++ ++请注意,将`ALIGN:64`属性赋予组件`R_alloc`是有效的,但不能赋予组件`R_scalar`或组件`I_nonalloc`。 ++ ++以下示例显示公共块的名称可以可选地用斜线括起来: ++ ++```fortran ++!DIR$ ATTRIBUTES ALIGN: n :: /common_name/ ++``` ++ ++ ++ ++ ++ ++#### 3.6.4.`ATTRIBUTES CODE_ALIGN` ++ ++​ `ATTRIBUTES`引导语选项`CODE_ALIGN`指定**==过程(procedure)==**的字节对齐方式。 ++ ++```fortran ++!DIR$ ATTRIBUTES CODE_ALIGN: n:: procedure-name ++``` ++ ++***n*** ++ ++​ 最小对齐边界的字节数。它必须是介于1和4096之间的2的幂,例如1、2、4、8、16、32、64、128等。 ++如果为n指定1,则不执行对齐。如果未指定n,则默认对齐方式为16字节。 ++ ++***procedure-name*** ++ ++​ 过程的名称。该指令可以受编译器选项`align-loops` 、CODE_ALIGN**引导语**和`CODE_ALIGN`**属性(ATTRIBUTE)的**影响。如果使用`-falign-loops=m` (Linux和macOS)或`/Qalign-loops:m` (Windows)选项编译代码,并且过程具有`CODE_ALIGN:k`属性,则过程按`MAX (m, k)`字节边界对齐。 ++ ++​ 如果过程具有`CODE_ALIGN:k`属性,且`CODE_ALIGN:n`引导语位于循环前面,则过程和循环都在`MAX (k, n)`字节边界上对齐。 ++ ++​ 考虑(ifort的测试文件)`test_align.f90`中的以下代码片段: ++ ++```fortran ++FUNCTION F () ++!DIR$ ATTRIBUTES CODE_ALIGN:32 :: F ++… ++!DIR$ CODE_ALIGN:16 ++DO J = 1, N ++… ++END DO ++… ++END FUNCTION F ++``` ++ ++​ 借助前述的一些约束,如果在编译`test_align.f90`的时候设置选项:`-falign-loops=64`(Linux和macOS)或/`Qalignloops:64`(Windows)那么将在64字节边界上对齐函数`F`和`DO J`循环。 ++ ++ ++ ++ ++ ++#### 3.6.5.`CODE_ALIGN` ++ ++​ 通用的编译器引导语:指定==循环==的字节对齐方式 ++ ++```fortran ++!DIR$ CODE_ALIGN [:n] ++``` ++ ++***n*** ++ ++​ (可选)表示最小对齐边界的字节数的正整数常量表达式。其值必须是介于1和4096之间的2的幂,例如1、2、4、8、16、32、64、128等。如果为n指定1,则不执行对齐。如果未指定n,则默认对齐方式为16字节。此指令必须位于要对齐的**循环**或**代码块**之前。 ++​ 如果使用`-falign loops=m`(Linux和macOS)或`/Qalign loops:m`(Windows)选项编译代码,并且在循环之前有“`code_ALIGN:n`”指令,则循环将在MAX(m,n)字节边界上对齐。 ++​ 如果一个过程具有“`CODE_ALIGN:k`”属性,并且循环之前有“`CODE_ALIGN:n`”指令,则该过程和循环都在`MAX(k,n)`字节边界上对齐。 ++ ++​ 考虑(ifort的测试文件)文件`test_code_align.f90`中的以下代码片段: ++ ++```fortran ++!DIR$ CODE_ALIGN ++DO J = 1, N ++… ++END DO ++``` ++ ++正在编译`test_code_align.f90`在(默认)16字节边界上对齐开始`DO J`循环的代码。 ++如果未指定`CODE_ALIGN`指令,则循环的对齐取决于实现,并且可能会随着编译而改变。 ++ ++ ++ ++ ++ ++#### 3.6.6.`ALIGNED Clause` ++ ++​ 并行指令子句(Parallel Directive Clause):指定==列表==中的==所有变量==都是对齐的。 ++ ++```fortran ++ALIGNED (list [:n]) ++``` ++ ++***list*** ++ ++​ 一个或多个变量的名称。每个名称必须用逗号分隔。列表中出现的任何变量都不能出现在多个`ALIGNED`子句中。 ++ ++***n*** ++ ++​ 必须是常量正整数表达式;它指示对齐的字节数。如果未指定n,编译器将使用为目标平台上的**SIMD指令**指定的**默认对齐**方式。 ++ ++​ ALIGN子句声明每个列表项的位置与ALIGN子句的可选对齐参数n中表示的字节数对齐。如果列表项具有`ALLOCATABLE`属性,则必须分配其分配状态。如果它具有`POINTER`属性,则必须关联其关联状态。如果列表项的类型是类型(`C_PTR`)或`Cray指针`,则必须定义该项。 ++ ++> 注意: ++ ++​ 使用ALIGNED子句时要小心。如果某些访问模式实际上未对齐,则指示编译器使用对齐的数据移动指令实现所有数组引用将导致运行时异常。 ++ ++ ++ ++ ++ ++ ++ ++### 3.7.扩展-选项 ++ ++#### 3.7.1.选项`align` ++ ++​ 作用:告诉编译器如何对齐某些数据项。 ++ ++```bash ++Linux OS: ++-align [keyword[, keyword...]] ++-noalign ++``` ++ ++参数中***keyword***要对齐的数据项。可能的值是: ++ ++| 选项 | 注释 | ++| ---------------- | ------------------------------------------------------------ | ++| none | 防止在公共块(common blocks)和结构(structures)的任何地方填充字节。 | ++| array***n***byte | 指定数组的起始边界。 | ++| [no]commons | 影响common 块实体的对齐。 | ++| [no]dcommons | 影响common 块实体的对齐。 | ++| [no]qcommons | 影响common 块实体的对齐。 | ++| [no]zcommons | 影响common 块实体的对齐。 | ++| [no]records | 影响派生类型(derived-type)组件和记录结构字段的对齐。 | ++| rec***n***byte | 指定记录结构的派生类型组件和字段的大小边界。 | ++| [no]sequence | 影响已排序的派生类型组件的对齐。 | ++| all | 尽可能向common 块和结构中的数据项添加填充字节。 | ++ ++默认值是 ++ ++- nocommons 不为公共块的对齐添加填充字节 ++- nodcommmons 不为公共块的对齐添加填充字节 ++- noqcommmons 不为公共块的对齐添加填充字节 ++- nozcommmons 不为公共块的对齐添加填充字节 ++- records 对齐派生类型组件并在默认的自然边界上记录结构字段( record structure fields) ++- nosequence 导致用SEQUENCE语句声明的派生类型组件被打包,而不管用户设置的当前对齐规则如何 ++- 默认情况下,common块不添加填充,但结构会添加填充 ++ ++ ++ ++##### 例1.`none` ++ ++​ 告诉编译器不要在common块或结构的任何地方添加填充字节。这与指定`noalign`相同。 ++ ++```forthan ++program main ++ type demo ++ integer(kind=8) :: a(3) ++ logical(kind=1) :: c ++ real(kind=8) :: b ++ end type ++ type(demo) :: test ++ write (*,*) 'location c=', loc(test%c) ++ write (*,*) 'location b=', loc(test%b) ++ write (*,*) 'location a=', loc(test%a) ++end program main ++ ++``` ++ ++```bash ++huwei@dell:~/exp$ ifort demo.f90 ++huwei@dell:~/exp$ ./a.out ++ location c= 5001736 ++ location b= 5001744 ++ location a= 5001712 ++huwei@dell:~/exp$ ifort demo.f90 -align none ++demo.f90(2): warning #6379: The structure contains one or more misaligned fields. [DEMO] ++ type demo ++---------^ ++huwei@dell:~/exp$ ./a.out ++ location c= 5001736 ++ location b= 5001737 ++ location a= 5001712 ++ ++``` ++ ++ ++ ++##### 例2.`array【n】byte` ++ ++​ 将数组的开始对齐到n字节边界上。N取值为8、16、32、64、128、256。n的默认值是8。这将影响除COMMON中的数组外的所有数组的开始对齐。数组的元素之间没有填充。 ++ ++```fortran ++program main ++ integer(4) :: test(1000) ++ write (*,*) 'location test=', loc(test) ++ write (*,*) 'location test[1]=', loc(test(1)) ++ write (*,*) 'location test[2]=', loc(test(2)) ++end program main ++``` ++ ++```bash ++huwei@dell:~/exp$ ./a.out ++ location test= 5001728 ++ location test[1]= 5001728 ++ location test[2]= 5001732 ++huwei@dell:~/exp$ ifort demo.f90 -align array16byte ++huwei@dell:~/exp$ ./a.out ++ location test= 5001728 ++ location test[1]= 5001728 ++ location test[2]= 5001732 ++huwei@dell:~/exp$ ifort demo.f90 -align array32byte ++huwei@dell:~/exp$ ./a.out ++ location test= 5001728 ++ location test[1]= 5001728 ++ location test[2]= 5001732 ++huwei@dell:~/exp$ ifort demo.f90 -align array128byte ++huwei@dell:~/exp$ ./a.out ++ location test= 5001856 ++ location test[1]= 5001856 ++ location test[2]= 5001860 ++ ++``` ++ ++ ++ ++##### 例3-和commons相关的测试 ++ ++> `[no]commons` ++ ++​ 通过根据需要添加填充字节,将自然边界上的所有common块实体对齐到**最多4个字节。** ++ ++​ `align nocommons`选项不为common块添加填充。在这种情况下,除非COMMON语句中指定的数据项的顺序将最大的数字数据项放在最前面,然后是第二个最大的数字数据(以此类推),然后是任何字符数据,否则就会出现未对齐的数据。 ++ ++> `[no]dcommons` ++ ++​ 通过根据需要添加填充字节,在自然边界上对齐所有公共块实体,**最多8个字节。** ++align nodcommons选项不向公共块添加填充。 ++ ++```fortran ++program main ++ implicit none ++ character(kind=1) ::a ++ integer(kind=8) :: b ++ common a, b !定义a,b是全局变量中的第1及第2个变量 ++ a='a' ++ b=2 ++ write(*,*) , 'common begin a addr=', loc(a) ++ write(*,*) , 'common begin b addr=', loc(b) ++ ++end program main ++``` ++ ++```bash ++huwei@dell:~/exp$ ifort demo.f90 ++demo.f90(4): remark #6375: Because of COMMON, the alignment of object is inconsistent with its type - potential performance impact. [B] ++ integer(kind=8) :: b ++-----------------------------^ ++huwei@dell:~/exp$ ./a.out ++ common begin a addr= 5356480 ++ common begin b addr= 5356481 ++huwei@dell:~/exp$ ifort demo.f90 -align commons ++demo.f90(4): remark #6375: Because of COMMON, the alignment of object is inconsistent with its type - potential performance impact. [B] ++ integer(kind=8) :: b ++-----------------------------^ ++huwei@dell:~/exp$ ./a.out ++ common begin a addr= 5356480 ++ common begin b addr= 5356484 ++huwei@dell:~/exp$ ifort demo.f90 -align dcommons ++huwei@dell:~/exp$ ./a.out ++ common begin a addr= 5356480 ++ common begin b addr= 5356488 ++ ++``` ++ ++> `[no]qcommons` ++ ++通过根据需要添加填充字节,在自然边界上对齐所有公共块实体,**最多为16个字节**。 ++align noqcommons选项不为公共块添加填充 ++ ++> `[no]zcommons` ++ ++通过根据需要添加填充字节,在自然边界上对齐所有公共块实体,**最多32个字节**。 ++align nozcommons选项不为公共块添加填充。 ++ ++```fortran ++program main ++ implicit none ++ character(kind=1) ::a ++ integer(kind=8) :: b(4) ++ common a, b !定义a,b是全局变量中的第1及第2个变量 ++ a='a' ++ b=2 ++ write(*,*) , 'common begin a addr=', loc(a) ++ write(*,*) , 'common begin b addr=', loc(b) ++end program main ++``` ++ ++ ++ ++```bash ++huwei@dell:~/exp$ ifort demo.f90 ++demo.f90(4): remark #6375: Because of COMMON, the alignment of object is inconsistent with its type - potential performance impact. [B] ++ integer(kind=8) :: b(4) ++-----------------------------^ ++huwei@dell:~/exp$ ./a.out ++ common begin a addr= 5356480 ++ common begin b addr= 5356481 ++huwei@dell:~/exp$ ifort demo.f90 -align qcommons ++huwei@dell:~/exp$ ./a.out ++ common begin a addr= 5356480 ++ common begin b addr= 5356488 ++huwei@dell:~/exp$ ifort demo.f90 -align zcommons ++huwei@dell:~/exp$ ./a.out ++ common begin a addr= 5356480 ++ common begin b addr= 5356488 ++ ++``` ++ ++ ++ ++##### 例4-`[no]records` ++ ++​ norecords将记录结构中派生类型和字段的组件对齐到**任意字节边界**上,不带填充。 ++​ align记录选项要求在没有SEQUENCE语句的记录结构和派生类型结构中的多个数据项通过根据需要添加填充自然对齐。 ++ ++```fortran ++program main ++ implicit none ++ type demo ++ logical(kind=1) :: c ++ real(kind=4) :: b ++ integer(kind=8) :: a(3) ++ end type ++ type(demo) :: test ++ write (*,*) 'location c=', loc(test%c) ++ write (*,*) 'location b=', loc(test%b) ++ write (*,*) 'location a=', loc(test%a) ++end program main ++ ++``` ++ ++ ++ ++```bash ++huwei@dell:~/exp$ ifort demo.f90 ++huwei@dell:~/exp$ ./a.out ++ location c= 5001712 ++ location b= 5001716 ++ location a= 5001720 ++huwei@dell:~/exp$ ifort demo.f90 -align norecords ++demo.f90(2): warning #6379: The structure contains one or more misaligned fields. [DEMO] ++ type demo ++---------^ ++huwei@dell:~/exp$ ./a.out ++ location c= 5001712 ++ location b= 5001713 ++ location a= 5001717 ++huwei@dell:~/exp$ ifort demo.f90 -align records ++huwei@dell:~/exp$ ./a.out ++ location c= 5001712 ++ location b= 5001716 ++ location a= 5001720 ++ ++``` ++ ++##### 例5-` rec[n]byte` ++ ++​ 将记录结构中派生类型和字段的组件按指定的大小边界(n)或将自然对齐它们的边界中较小的一个对齐。n取值为1、2、4、8、16、32。n的默认值是8。指定此选项时,第一个结构成员之后的每个结构成员都存储在成员类型的大小或n字节边界上,以较小者为准。例如,为文件prog1中的所有结构和联合指定16字节作为打包边界(或对齐约束)。F,使用以下命令: ++ ++`ifort {-align rec16byte | /align:rec16byte} prog1.f` ++ ++此选项不影响common块是自然对齐(naturally aligned)还是packed。 ++ ++```fortran ++program main ++ implicit none ++ type demo ++ logical(kind=1) :: c ++ real(kind=4) :: b ++ integer(kind=8) :: a(3) ++ end type ++ type(demo) :: test ++ write (*,*) 'location c=', loc(test%c) ++ write (*,*) 'location b=', loc(test%b) ++ write (*,*) 'location a=', loc(test%a) ++end program main ++``` ++ ++```bash ++huwei@dell:~/exp$ ifort demo.f90 ++huwei@dell:~/exp$ ./a.out ++ location c= 5001712 ++ location b= 5001716 ++ location a= 5001720 ++huwei@dell:~/exp$ ifort demo.f90 -align rec2byte ++demo.f90(2): warning #6379: The structure contains one or more misaligned fields. [DEMO] ++ type demo ++---------^ ++huwei@dell:~/exp$ ./a.out ++ location c= 5001712 ++ location b= 5001714 ++ location a= 5001718 ++ ++``` ++ ++ ++ ++##### 例6-`sequence` ++ ++​ 根据当前使用的对齐规则,对**使用SEQUENCE语句声明**的派生类型的组件(已排序的组件)进行对齐。默认对齐规则是在自然边界上对齐未排序的组件。 ++ ++​ `align nosequence`选项要求打包已排序的组件,而不考虑任何其他对齐规则。注意,align none表示align nosequence。 ++ ++​ 如果指定了用于标准检查的选项,则会忽略`align sequence`。 ++ ++```fortran ++program main ++ type :: demo ++ sequence ++ character(kind=1) :: a ++ integer(kind=8) :: b ++ end type ++ type(demo) :: test ++ write (*,*) 'location a=', loc(test%a) ++ write (*,*) 'location b=', loc(test%b) ++end program main ++ ++``` ++ ++```bash ++huwei@dell:~/exp$ ifort demo.f90 ++demo.f90(2): warning #6379: The structure contains one or more misaligned fields. [DEMO] ++ type :: demo ++----------------^ ++huwei@dell:~/exp$ ./a.out ++ location a= 5001712 ++ location b= 5001713 ++huwei@dell:~/exp$ ifort demo.f90 -align sequence ++huwei@dell:~/exp$ ./a.out ++ location a= 5001712 ++ location b= 5001720 ++huwei@dell:~/exp$ ifort demo.f90 -align nosequence ++demo.f90(2): warning #6379: The structure contains one or more misaligned fields. [DEMO] ++ type :: demo ++----------------^ ++huwei@dell:~/exp$ ./a.out ++ location a= 5001712 ++ location b= 5001713 ++``` ++ ++ ++ ++ ++ ++ ++ ++ ++ ++#### 3.7.2.选项`-q[no-]opt-dynamic-align` ++ 启用或禁用动态数据对齐优化。 ++ ++```txt ++Syntax ++Linux OS: ++-qopt-dynamic-align ++-qno-opt-dynamic-align ++``` ++ ++- 是否启用动态数据对齐优化。 ++ ++```fortran ++program main ++ implicit none ++ real, dimension (:,:), allocatable :: darray ++ integer :: s1, s2 ++ integer :: i, j ++ print*, "Enter the size of the array:" ++ s1=2 ++ s2=2 ++ ! allocate memory ++ allocate ( darray(s1,s2) ) ++ ++ do i = 1, s1 ++ do j = 1, s2 ++ darray(i,j) = i*j ++ print*, "darray(",i,",",j,") = ", darray(i,j) ++ end do ++ end do ++ ++ write(*,*) 'addr darray(1,1)' , loc( darray(1,1) ) ++ write(*,*) 'addr darray(2,1)' , loc( darray(2,1) ) ++ write(*,*) 'addr darray(1,2)' , loc( darray(1,2) ) ++ write(*,*) 'addr darray(2,2)' , loc( darray(2,2) ) ++ deallocate (darray) ++end program main ++ ++``` ++ ++```bash ++huwei@dell:~/exp$ ifort demo.f90 ++huwei@dell:~/exp$ ./a.out ++ Enter the size of the array: ++ darray( 1 , 1 ) = 1.000000 ++ darray( 1 , 2 ) = 2.000000 ++ darray( 2 , 1 ) = 2.000000 ++ darray( 2 , 2 ) = 4.000000 ++ addr darray(1,1) 6550624 ++ addr darray(2,1) 6550628 ++ addr darray(1,2) 6550632 ++ addr darray(2,2) 6550636 ++huwei@dell:~/exp$ ifort demo.f90 -qno-opt-dynamic-align ++huwei@dell:~/exp$ ./a.out ++ Enter the size of the array: ++ darray( 1 , 1 ) = 1.000000 ++ darray( 1 , 2 ) = 2.000000 ++ darray( 2 , 1 ) = 2.000000 ++ darray( 2 , 2 ) = 4.000000 ++ addr darray(1,1) 28984416 ++ addr darray(2,1) 28984420 ++ addr darray(1,2) 28984424 ++ addr darray(2,2) 28984428 ++huwei@dell:~/exp$ ifort demo.f90 -qopt-dynamic-align ++huwei@dell:~/exp$ ./a.out ++ Enter the size of the array: ++ darray( 1 , 1 ) = 1.000000 ++ darray( 1 , 2 ) = 2.000000 ++ darray( 2 , 1 ) = 2.000000 ++ darray( 2 , 2 ) = 4.000000 ++ addr darray(1,1) 35599456 ++ addr darray(2,1) 35599460 ++ addr darray(1,2) 35599464 ++ addr darray(2,2) 35599468 ++ ++``` ++ ++ ++ ++ ++ ++#### 3.7.3.选项`-falign-functions` ++ ++- `falign-functions[=n]` ++- ` -fno-align-functions` ++ ​ 告诉编译器将过程(procedures)对齐到最佳字节边界上。 ++ ++```fortran ++program main ++ integer(kind=4) :: a=2 ++ integer(kind=4) :: b=3 ++ integer(kind=4) :: c ++ integer(kind=4) :: d ++ write(*,*) myadd(a,b) ++ write(*,*) mymuli(a,b) ++ write( *, *), 'function-myadd-addr=', loc(myadd) ++ write( *, *) , 'function-mymuli-addr=', loc(testadd) ++end program main ++ ++function myadd(first, second) ++ integer(kind=4):: first , second ++ integer(kind=4):: myadd ++ myadd= first + second ++ return ++end function myadd ++ ++function mymuli(first , second) ++ integer(kind=4):: first , second ++ integer(kind=4):: mymuli ++ mymuli= first*second ++ return ++end function mymuli ++ ++``` ++ ++```bash ++huwei@dell:~/exp$ ifort demo.f90 -falign-functions=32 ++huwei@dell:~/exp$ ./a.out ++ 5 ++ 6 ++ function-myadd-addr= 4209152 ++ function-mymuli-addr= 140733628440064 ++huwei@dell:~/exp$ ifort demo.f90 -falign-functions=64 ++huwei@dell:~/exp$ ./a.out ++ 5 ++ 6 ++ function-myadd-addr= 4209216 ++ function-mymuli-addr= 140727951719808 ++huwei@dell:~/exp$ ifort demo.f90 -fno-align-functions ++huwei@dell:~/exp$ ./a.out ++ 5 ++ 6 ++ function-myadd-addr= 4209120 ++ function-mymuli-addr= 140732705014400 ++``` ++ ++ ++ ++#### 3.7.4.选项`falign-loops` ++ ++​ 将循环对齐为2次幂的字节边界。此功能仅适用于ifort。 ++ ++```fortran ++program main ++ implicit none ++ real, dimension (:,:), allocatable :: darray ++ integer :: s1, s2 ++ integer :: i, j ++ print*, "Enter the size of the array:" ++ s1=2 ++ s2=2 ++ ! allocate memory ++ allocate ( darray(s1,s2) ) ++ ++ do i = 1, s1 ++ do j = 1, s2 ++ darray(i,j) = i*j ++ print*, "darray(",i,",",j,") = ", darray(i,j) ++ end do ++ end do ++ ++ write(*,*) 'addr darray(1,1)' , loc( darray(1,1) ) ++ write(*,*) 'addr darray(2,1)' , loc( darray(2,1) ) ++ write(*,*) 'addr darray(1,2)' , loc( darray(1,2) ) ++ write(*,*) 'addr darray(2,2)' , loc( darray(2,2) ) ++ deallocate (darray) ++end program main ++ ++``` ++ ++```bash ++huwei@dell:~/exp$ gfortran demo.f90 ++huwei@dell:~/exp$ ./a.out ++ Enter the size of the array: ++ darray( 1 , 1 ) = 1.00000000 ++ darray( 1 , 2 ) = 2.00000000 ++ darray( 2 , 1 ) = 2.00000000 ++ darray( 2 , 2 ) = 4.00000000 ++ addr darray(1,1) 94641249921296 ++ addr darray(2,1) 94641249921300 ++ addr darray(1,2) 94641249921304 ++ addr darray(2,2) 94641249921308 ++huwei@dell:~/exp$ gfortran demo.f90 -falign-loops=32 ++huwei@dell:~/exp$ ./a.out ++ Enter the size of the array: ++ darray( 1 , 1 ) = 1.00000000 ++ darray( 1 , 2 ) = 2.00000000 ++ darray( 2 , 1 ) = 2.00000000 ++ darray( 2 , 2 ) = 4.00000000 ++ addr darray(1,1) 94420423407888 ++ addr darray(2,1) 94420423407892 ++ addr darray(1,2) 94420423407896 ++ addr darray(2,2) 94420423407900 ++huwei@dell:~/exp$ gfortran demo.f90 -falign-loops=128 ++huwei@dell:~/exp$ ./a.out ++ Enter the size of the array: ++ darray( 1 , 1 ) = 1.00000000 ++ darray( 1 , 2 ) = 2.00000000 ++ darray( 2 , 1 ) = 2.00000000 ++ darray( 2 , 2 ) = 4.00000000 ++ addr darray(1,1) 94814366185744 ++ addr darray(2,1) 94814366185748 ++ addr darray(1,2) 94814366185752 ++ addr darray(2,2) 94814366185756 ++ ++``` ++ ++ ++ ++ ++ ++ ++#### 3.7.5.选项`mbranches-within-32B-boundaries` ++ ++ ++​ 告诉编译器在32字节边界上对齐分支和融合分支以获得更好的性能。 ++ ++```fortran ++program main ++ integer(kind=1) :: a ++ integer(kind=2) :: b ++ integer(kind=4) :: c ++ integer(kind=8) :: d ++ ++ write (*,*) 'location a=', loc(a) ++ write (*,*) 'location b=', loc(b) ++ write (*,*) 'location c=', loc(c) ++ write (*,*) 'location d=', loc(d) ++end program main ++ ++``` ++ ++```bash ++huwei@dell:~/exp$ ifort demo.f90 ++huwei@dell:~/exp$ ./a.out ++ location a= 140730668543150 ++ location b= 140730668543148 ++ location c= 140730668543144 ++ location d= 140730668543128 ++huwei@dell:~/exp$ ifort demo.f90 -mbranches-within-32B-boundaries ++huwei@dell:~/exp$ ./a.out ++ location a= 140721717080110 ++ location b= 140721717080108 ++ location c= 140721717080104 ++ location d= 140721717080088 ++huwei@dell:~/exp$ ifort demo.f90 -mno-branches-within-32B-boundaries ++huwei@dell:~/exp$ ./a.out ++ location a= 140726490923438 ++ location b= 140726490923436 ++ location c= 140726490923432 ++ location d= 140726490923416 ++ ++``` ++ ++ ++ ++​ ++ ++#### 3.7.6.选项`qopt-dynamic-align` ++ ++​ 启用或禁用动态数据对齐优化。 ++ ++```fortran ++program main ++ implicit none ++ real, dimension (:,:), allocatable :: darray ++ integer :: s1, s2 ++ integer :: i, j ++ print*, "Enter the size of the array:" ++ s1=2 ++ s2=2 ++ ! allocate memory ++ allocate ( darray(s1,s2) ) ++ ++ do i = 1, s1 ++ do j = 1, s2 ++ darray(i,j) = i*j ++ print*, "darray(",i,",",j,") = ", darray(i,j) ++ end do ++ end do ++ ++ write(*,*) 'addr darray(1,1)' , loc( darray(1,1) ) ++ write(*,*) 'addr darray(2,1)' , loc( darray(2,1) ) ++ write(*,*) 'addr darray(1,2)' , loc( darray(1,2) ) ++ write(*,*) 'addr darray(2,2)' , loc( darray(2,2) ) ++ deallocate (darray) ++end program main ++ ++``` ++ ++```bash ++huwei@dell:~/exp$ ifort demo.f90 ++huwei@dell:~/exp$ ./a.out ++ Enter the size of the array: ++ darray( 1 , 1 ) = 1.000000 ++ darray( 1 , 2 ) = 2.000000 ++ darray( 2 , 1 ) = 2.000000 ++ darray( 2 , 2 ) = 4.000000 ++ addr darray(1,1) 21222496 ++ addr darray(2,1) 21222500 ++ addr darray(1,2) 21222504 ++ addr darray(2,2) 21222508 ++huwei@dell:~/exp$ ifort demo.f90 qopt-dynamic-align ++ifort: error #10236: File not found: 'qopt-dynamic-align' ++huwei@dell:~/exp$ ifort demo.f90 -qopt-dynamic-align ++huwei@dell:~/exp$ ./a.out ++ Enter the size of the array: ++ darray( 1 , 1 ) = 1.000000 ++ darray( 1 , 2 ) = 2.000000 ++ darray( 2 , 1 ) = 2.000000 ++ darray( 2 , 2 ) = 4.000000 ++ addr darray(1,1) 17265760 ++ addr darray(2,1) 17265764 ++ addr darray(1,2) 17265768 ++ addr darray(2,2) 17265772 ++huwei@dell:~/exp$ ifort demo.f90 -qno-opt-dynamic-align ++huwei@dell:~/exp$ ./a.out ++ Enter the size of the array: ++ darray( 1 , 1 ) = 1.000000 ++ darray( 1 , 2 ) = 2.000000 ++ darray( 2 , 1 ) = 2.000000 ++ darray( 2 , 2 ) = 4.000000 ++ addr darray(1,1) 18261088 ++ addr darray(2,1) 18261092 ++ addr darray(1,2) 18261096 ++ addr darray(2,2) 18261100 ++ ++``` ++ ++ ++ ++### 3.8.硬件或操作系统相关扩展 ++ ++​ 本部分,笔者只做简要介绍,具体的请阅读文档:Intel® Fortran Compiler Classic and Intel® Fortran Compiler Developer Guide and Reference^3^ ++ ++ ++ ++#### 3.8.1.选项`Qsfalign` ++ ++只支持IA-32硬件平台下的WindowsOS,而Linux和MacOS对此均不支持。 ++**作用**:指定函数的堆栈对齐方式。此选项已弃用,将在未来的版本中删除。此功能仅适用于ifort。 ++ ++ ++ ++#### 3.8.2.选项`falign-stack` ++ ++只支持IA-32硬件平台下的Linux,而MacOS和WindowsOS对此均不支持。 ++ ++**作用**:告诉编译器要在例程(routines)入口使用的堆栈对齐方式。此选项已弃用,将在未来的版本中删除。此功能仅适用于ifort ++ ++​ ++ ++ ++ ++## 4.总结 ++ ++​ 本文档中部分笔者抽取的Fortran基本内存对齐场景,是对照于C和C++的内存对齐场景而来,旨在为Fortran提供基础的内存对齐,为后续可能的优化提供基础。主要是以下5大场景 ++ ++- 场景1.基本数据类型 ++- 场景2.复合类型 ++ ++- 场景3.函数边界 ++ ++- 场景4.分配函数类:allocator和deallocate的align实现 ++ ++- 场景5.Fortran特有的common块内变量的对齐 ++ ++​ 但对这5大场景,其实gfortran和ifort支持主要是只是实现了基础的根据kind分配内存,并没有进一步的其余对齐。然后针对这一部分的扩展对齐,gfortran支持的特性很少,主要是针对common块内变量对齐进行了调整。而ifort对align则支持特性更多,无论是数组整体的对齐、循环的对齐、common块入口或common块内,module块类、堆栈边界等,都做了相应的一些支持。 ++ ++​ 因而,笔者抽取进一步的Fortran主流编译器可用来指导后续现代forthan编译器内存对齐可做的一些场景如下: ++ ++- 附加场景1:实现类似alignof的关键词,用于查询某类型or变量or函数的对齐数 ++- 附加场景2:显示的使用选项约束堆栈边界的对齐数 ++- 附加场景3:支持用户指定的结构对齐规则 ++- 附加场景4:common块边界 ++- 附加场景5:循环对齐 ++- 附加场景6:module、type内变量的对齐 ++ ++ ++ ++ ++ ++## 附录 ++ ++### 附录1-gfortran的安装 ++ ++​ 后续不需要进行gfortran的源码分析,我们采用最方便的安装方式,最终采用目前最新^9^的gfortran版本10.4.0 ++ ++```bash ++sudo apt install gfortran-10 ++``` ++ ++- 然后使用linux软件版本管理命令,如`update-alternatives`处理linux系统中软件版本的切换,比如我的Ubuntu下,可如下操作 ++ ++```bash ++root@ubuntu:~/huawei# update-alternatives --install /usr/bin/gfortran gfortran /usr/bin/gfortran-10 20 ++``` ++ ++ ++ ++ ++ ++### 附录2-ifort的安装 ++ ++- 安装方式^4,5^ ++ ++#### 1.Ubuntu版本GUI版 ++ ++```bash ++whoway@VMwhoway:~/ifortTemp$ cat /proc/version ++Linux version 5.15.0-46-generic (buildd@lcy02-amd64-007) (gcc (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0, GNU ld (GNU Binutils for Ubuntu) 2.34) #49~20.04.1-Ubuntu SMP Thu Aug 4 19:15:44 UTC 2022 ++``` ++ ++ ++ ++#### 2.前置环境配置 ++ ++​ 在进行ifort安装前,ubuntu内需要有一些基本库的,不然安装会不成功,在终端中使用以下两条语句完成基本库的配置,注意查看是否下载了这些基本库: ++ ++```bash ++sudo apt update ++sudo apt -y install gcc g++ cmake pkg-config build-essential ++``` ++ ++ ++ ++ ++ ++#### 3.ifort安装过程 ++ ++- 打开终端,输入下面语句进行安装包的下载: ++ - 注:下述的intel的许可来自:https://www.intel.com/content/www/us/en/developer/articles/news/free-intel-software-developer-tools.html^4^ ++ ++```bash ++wget https://registrationcenter-download.intel.com/akdlm/irc_nas/18438/l_HPCKit_p_2022.1.1.97_offline.sh ++``` ++ ++- 打开终端,输入下面语句进行安装: ++ ++```bash ++sudo bash l_HPCKit_p_2022.1.1.97_offline.sh ++``` ++ ++- 界面化安装,和windows下差不多,安装需要的模块即可 ++ ++> 至此,ifort安装成功 ++ ++ ++ ++#### 4.配置环境变量 ++ ++默认安装位置 ++ ++- 对于root用户,默认安装位置在:`/opt/intel/oneapi/` ++ ++- 普通用户安装位置在:`~/intel/oneapi/` ++ ++设置环境变量: ++ ++- 如果是普通用户,我们在`~/.bashrc`中最后一行添加`source /setvars.sh` ++ - 如果是root用户,我们在`.bashrc`最后另起一行添加`source /opt/intel/oneapi/setvars.sh` ++ ++- 最后一步测试:**新打开一个终端**输入`ifort -v`,有ifort版本号提示即表示安装成功。 ++ ++ ++ ++ ++ ++ ++ ++ ++## 参考资料 ++ ++- [1] Fortran95程序设计,彭国伦 ++- [2] Fortran95/2003程序设计,第3版,Stephen J.Chapman著,刘瑾[译] ++- [3] Intel® Fortran Compiler Classic and Intel® Fortran Compiler Developer Guide and Reference ++- [4] Linux系统中ifort的安装教程及使用方法,附Vmware和Ubuntu系统的安装包,https://www.bilibili.com/video/BV1dS4y1C77J/?spm_id_from=333.999.0.0&vd_source=ea20f1fccee6fd3f1af7d59cd3ae7575 ++- [5] https://www.bilibili.com/read/cv15164219 ++- [6] 介绍https://fortran-lang.org/zh_CN/compilers/ ++- [7] https://www.intel.com/content/www/us/en/developer/articles/technical/alignment-of-fortran-allocatable-arrays-pointers-in-intel-fortran-compiler.html ++- [8] gfotran源码:https://ftp.gnu.org/gnu/gforth/gforth-0.7.3.tar.gz ++- [9] What's new in gfortran?:https://gcc.gnu.org/wiki/GFortran/News ++- [10] https://gcc.gnu.org/onlinedocs/gcc-3.4.1/g77/Optimize-Options.html ++- [11] https://docs.oracle.com/cd/E19957-01/805-4939/c400041360f5/index.html ++- [12] https://www.intel.cn/content/www/cn/zh/search.html?ws=text#q=align&sort=relevancy ++- [13] Using GNU Fortran , For gcc version 10.4.0 +diff --git a/docs/fortran-memory-align-and-classic-flang-memory-align-investigation.md b/docs/fortran-memory-align-and-classic-flang-memory-align-investigation.md +new file mode 100644 +index 0000000..b310cb6 +--- /dev/null ++++ b/docs/fortran-memory-align-and-classic-flang-memory-align-investigation.md +@@ -0,0 +1,1128 @@ ++## classic flang内存对齐能力报告 ++ ++ ++ ++- 版本号:1.0 ++ ++- 文档编辑:胡伟 ++ ++ - last modify:2022.11.22 ++ ++ ++ ++ ++## 目录 ++ ++[TOC] ++ ++​ LLVM官方有一个fortran编译器,原来叫f18,现在叫flang,但是不完善,现在还不可用;2018年Nvidia开源了收购的pgi公司的fortran编译器,后端用的llvm,当时就叫flang,也就是我们本文现在所基于的编译器,现在称为classic flang^1^。classic flang目前是可用的,AMD、Nvidia、ARM、华为毕昇的编译器都是基于这个编译器,但是功能也不完善。 ++ ++​ 本文中使用的术语==Flang==就是Classic Flang。它是 pgfortran 的开源版本,是 `PGI/NVIDIA` 的商业 Fortran 编译器。它不同于自 2020 年以来一直是 LLVM 项目的一部分的新 Flang(以前称为“F18”;参见`https://flang.llvm.org/`),尽管两者都是由同一个社区开发的。^1^。 ++ ++​ 本文首先调研了F2008和F2018标准关于内存对齐的需求,然后仿照C和C++的内存对齐,抽取出了一部分,Fortran或许可以提供的内存对齐方式,最后笔者调研了当前最新版的`classic flang15.0.3`关于内存对齐的能力。 ++ ++ ++ ++## 1.Fortran标准关于内存对齐 ++ ++​ 笔者阅读了Fortran的2008和2018^5^^,6^相关标准,总结如下: ++ ++F2008和F2018对Fortran**语言本身**的内存对齐均没有显示的相关描述^2,4,5^。 ++ ++ ++ ++## 2.抽取Fortran基本的内存对齐场景 ++ ++​ 本部分笔者抽取的Fortran基本内存对齐场景,是对照于C和C++的内存对齐场景而来,旨在为Fortran提供基础的内存对齐,为后续可能的优化提供基础。现描述如下: ++ ++> 场景1.基本数据类型 ++ ++- logical ++- character ++- integer ++- real ++- ==complex== ++ ++> 场景2.复合类型 ++ ++- 第1类:同类型复合 ++ - ==array== ++- 第2类:不同类型复合 ++ - ==type== ++ ++> 场景3.函数边界 ++ ++- subroutine ++ ++- function ++ ++> 场景4.分配函数类:allocator和deallocate的align实现 ++ ++- allocate ++ ++- deallocate ++ ++> 场景5:Fortran特有的—common块 ++ ++- common ++ ++​ 此外,笔者调研了gfortran和ifort这些Fortran主流编译器后,扩展的一些可做的内存对齐场景如下^9^: ++ ++- 附加场景1:实现类似alignof的关键词,用于查询某类型or变量or函数的对齐数 ++- 附加场景2:显示的使用选项约束堆栈边界的对齐数 ++- 附加场景3:支持用户指定的结构对齐规则 ++- 附加场景4:common块边界 ++- 附加场景5:循环对齐 ++- 附加场景6:module、type内变量的对齐 ++ ++ ++ ++ ++ ++ ++ ++## 3.`classic flang`内存对齐能力测试 ++ ++- 本部分,采用目前最新的`classic flang15.0.3`进行样例支持。 ++- 本部分的样例,主要以标准Fortran90和Fortran95来展示 ++ ++ ++ ++### 3.1.场景1.基础变量类型 ++ ++ ++ ++#### 例1.f90-logical ++ ++```fortran ++program main ++ implicit none ++ logical(kind=1) :: a ++ logical(kind=2) :: b ++ logical(kind=4) :: c ++ logical(kind=8) :: d ++ print * , kind(a) ++ print * , kind(b) ++ print * , kind(c) ++ print * , kind(d) ++ ++end program main ++ ++``` ++ ++```bash ++huwei@dell:~/exp$ flang demo.f90 ++huwei@dell:~/exp$ ./a.out ++ 1 ++ 2 ++ 4 ++ 8 ++``` ++ ++> classic flang15.0.3支持logical的kind可为1,2,4,8 ++ ++ ++ ++#### 例2.f90-character ++ ++```fortran ++program main ++ implicit none ++ character(kind=1) :: a ++ character(kind=2) :: b ++ print * , kind(a) ++ print * , kind(b) ++ ++end program main ++ ++``` ++ ++```bash ++huwei@dell:~/exp$ gfortran demo.f90 ++huwei@dell:~/exp$ ./a.out ++ 1 ++ 2 ++``` ++ ++> classic flang15.0.3支持character的kind可为1,2 ++ ++ ++ ++#### 例3.f90-integer ++ ++```fortran ++program main ++ implicit none ++ integer(kind=1) :: a ++ integer(kind=2) :: b ++ integer(kind=4) :: c ++ integer(kind=8) :: d ++ print * , kind(a) ++ print * , kind(b) ++ print * , kind(c) ++ print * , kind(d) ++end program main ++``` ++ ++```bash ++huwei@dell:~/exp$ gfortran demo.f90 ++huwei@dell:~/exp$ ./a.out ++ 1 ++ 2 ++ 4 ++ 8 ++ 16 ++``` ++ ++> classic flang15.0.3支持integer的kind可为1,2,4,8 ++ ++```fortran ++program main ++ integer(kind=1) :: a ++ integer(kind=2) :: b ++ integer(kind=4) :: c ++ integer(kind=8) :: d ++ ++ write (*,*) 'location a=', loc(a) ++ write (*,*) 'location b=', loc(b) ++ write (*,*) 'location c=', loc(c) ++ write (*,*) 'location d=', loc(d) ++end program main ++ ++``` ++ ++```bash ++huwei@dell:~/exp$ flang demo.f90 ++huwei@dell:~/exp$ ./a.out ++ location a= 140737487867571 ++ location b= 140737487867568 ++ location c= 140737487867564 ++ location d= 140737487867552 ++``` ++ ++ ++ ++ ++ ++#### 例4-f90-real ++ ++```fortran ++program main ++ implicit none ++ real(kind=4) :: c ++ real(kind=8) :: d ++ print * , kind(c) ++ print * , kind(d) ++end program main ++``` ++ ++```bash ++huwei@dell:~/exp$ gfortran demo.f90 ++huwei@dell:~/exp$ ./a.out ++ 4 ++ 8 ++``` ++ ++> classic flang15.0.3支持real的kind可为4,8 ++ ++ ++ ++#### 例5-f90-complex ++ ++```fortran ++program main ++ implicit none ++ complex(kind=4) :: c ++ complex(kind=8) :: d ++ print * , kind(c) ++ print * , kind(d) ++end program main ++ ++``` ++ ++```bash ++huwei@dell:~/exp$ flang demo.f90 ++huwei@dell:~/exp$ ./a.out ++ 4 ++ 8 ++``` ++ ++> classic flang15.0.3支持complex的kind可为4,8 ++ ++ ++ ++ ++ ++### 3.2.场景2.复合类型 ++ ++#### 例1-f90-数组 ++ ++```fortran ++program main ++ implicit none ++ integer(kind=4) :: test(1000) !修改kind ++ write (*,*) 'location test=', loc(test) ++ write (*,*) 'location test[1]=', loc(test(1)) ++ write (*,*) 'location test[2]=', loc(test(2)) ++end program main ++``` ++ ++```bash ++# 这是integer(kind=2) ++huwei@dell:~/exp$ flang demo.f90 ++huwei@dell:~/exp$ ./a.out ++ location test= 4210848 ++ location test[1]= 4210848 ++ location test[2]= 4210850 ++ ++# 这是integer(kind=4) ++huwei@dell:~/exp$ flang demo.f90 ++huwei@dell:~/exp$ ./a.out ++ location test= 4210848 ++ location test[1]= 4210848 ++ location test[2]= 4210852 ++ ++``` ++ ++ ++ ++#### 例2-f90-type ++ ++```fortran ++program main ++ implicit none ++ type demo ++ integer(kind=4) :: a(3) ++ real(kind=4) :: b ++ logical(kind=1) :: c ++ end type ++ type(demo) :: test ++ write (*,*) 'location c=', loc(test%c) ++ write (*,*) 'location b=', loc(test%b) ++ write (*,*) 'location a=', loc(test%a) ++end program main ++``` ++ ++```bash ++huwei@dell:~/exp$ flang demo.f90 ++huwei@dell:~/exp$ ./a.out ++ location c= 4210992 ++ location b= 4210988 ++ location a= 4210976 ++ ++``` ++ ++ ++ ++ ++ ++ ++ ++### 3.3.场景3.函数边界 ++ ++​ “函数”是自定义函数和子程序的统称。 ++ ++#### 例1-f90-subroutine ++ ++```fortran ++program main ++ call one() ++ call two() ++ write( *, *), 'subroutine-one-addr=', loc(one) ++ write( *, *) , 'subroutine-two-addr=', loc(two) ++ ++end program main ++ ++subroutine one() ++ integer :: a=3 ++ print * , a ++end subroutine one ++ ++subroutine two() ++ print * , 'two' ++end subroutine two ++``` ++ ++```bash ++huwei@dell:~/exp$ flang demo.f90 ++huwei@dell:~/exp$ ./a.out ++ 3 ++ two ++ subroutine-one-addr= 4199328 ++ subroutine-two-addr= 4199456 ++``` ++ ++ ++ ++#### 例2-f90-function ++ ++```fortran ++program main ++ integer(kind=4) :: a=2 ++ integer(kind=4) :: b=3 ++ integer(kind=4) :: c ++ integer(kind=4) :: d ++ write(*,*) myadd(a,b) ++ write(*,*) mymuli(a,b) ++ write( *, *), 'function-myadd-addr=', loc(myadd) ++ write( *, *) , 'function-mymuli-addr=', loc(testadd) ++end program main ++ ++function myadd(first, second) ++ integer(kind=4):: first , second ++ integer(kind=4):: myadd ++ myadd= first + second ++ return ++end function myadd ++ ++function mymuli(first , second) ++ integer(kind=4):: first , second ++ integer(kind=4):: mymuli ++ mymuli= first*second ++ return ++end function mymuli ++``` ++ ++```bash ++huwei@dell:~/exp$ flang demo.f90 ++huwei@dell:~/exp$ ./a.out ++ 5 ++ 6 ++ function-myadd-addr= 4199584 ++ function-mymuli-addr= 140732352770816 ++ ++``` ++ ++ ++ ++ ++ ++### 3.4.场景4.内存分配函数 ++ ++#### 例1-f90 ++ ++```fortran ++program main ++ implicit none ++ real, dimension (:,:), allocatable :: darray ++ integer :: s1, s2 ++ integer :: i, j ++ print*, "Enter the size of the array:" ++ s1=2 ++ s2=2 ++ ! allocate memory ++ allocate ( darray(s1,s2) ) ++ ++ do i = 1, s1 ++ do j = 1, s2 ++ darray(i,j) = i*j ++ print*, "darray(",i,",",j,") = ", darray(i,j) ++ end do ++ end do ++ ++ write(*,*) 'addr darray(1,1)' , loc( darray(1,1) ) ++ write(*,*) 'addr darray(2,1)' , loc( darray(2,1) ) ++ write(*,*) 'addr darray(1,2)' , loc( darray(1,2) ) ++ write(*,*) 'addr darray(2,2)' , loc( darray(2,2) ) ++ deallocate (darray) ++end program main ++ ++``` ++ ++```bash ++huwei@dell:~/exp$ flang demo.f90 ++huwei@dell:~/exp$ ./a.out ++ Enter the size of the array: ++ darray( 1 , 1 ) = 1.000000 ++ darray( 1 , 2 ) = 2.000000 ++ darray( 2 , 1 ) = 2.000000 ++ darray( 2 , 2 ) = 4.000000 ++ addr darray(1,1) 15861248 ++ addr darray(2,1) 15861252 ++ addr darray(1,2) 15861256 ++ addr darray(2,2) 15861260 ++ ++``` ++ ++ ++ ++ ++ ++### 3.5.场景5.common ++ ++#### 例1-f90 ++ ++- 如下,源码改造自彭国伦的《Fortran95程序设计》^1^ ++ ++```fortran ++program ex0810 ++ implicit none ++ integer :: a, b ++ common a, b !定义a,b是全局变量中的第1及第2个变量 ++ a=1 ++ b=2 ++ call ShowCommon() ++ write(*,*) , 'common begin a addr=', loc(a) ++ write(*,*) , 'common begin b addr=', loc(b) ++ ++ stop ++ ++end program ex0810 ++ ++subroutine ShowCommon() ++ implicit none ++ integer :: num1, num2 ++ common num1, num2 !定义num1,num2是全局变量中的第1及第2个变量 ++ write(*,*) num1, num2 ++ return ++ ++end subroutine ShowCommon ++``` ++ ++```bash ++huwei@dell:~/exp$ flang demo.f90 ++huwei@dell:~/exp$ ./a.out ++ 1 2 ++ common begin a addr= 4210944 ++ common begin b addr= 4210948 ++FORTRAN STOP ++ ++``` ++ ++​ ++ ++ ++ ++ ++ ++ ++ ++## 4.classic flang15.0.3扩展-引导语 ++ ++> 若要构建classic flang12.0.1或15.0.3,请参考:**附录1-构建classic flang编译环境** ++ ++ ++ ++### 4.1.`assume_aligned` ++ ++​ 形如 ++ ++```fortran ++!dir$ assume_aligned arr:64 ++``` ++ ++​ 使用`assume_aligned` 指令,`assume_aligned`指的是数组的属性,指定数组对齐方式。 ++ ++```fortran ++program main ++ implicit none ++ real, dimension (:,:), allocatable :: darray ++ integer :: s1, s2 ++ integer :: i, j ++ print*, "Enter the size of the array:" ++ s1=2 ++ s2=2 ++ allocate ( darray(s1,s2) ) ++ !dir$ assume_aligned darray:64 ++ do i = 1, s1 ++ do j = 1, s2 ++ darray(i,j) = i*j ++ print*, "darray(",i,",",j,") = ", darray(i,j) ++ end do ++ end do ++ ++ write(*,*) 'addr darray(1,1)' , loc( darray(1,1) ) ++ write(*,*) 'addr darray(2,1)' , loc( darray(2,1) ) ++ write(*,*) 'addr darray(1,2)' , loc( darray(1,2) ) ++ write(*,*) 'addr darray(2,2)' , loc( darray(2,2) ) ++ deallocate (darray) ++end program main ++ ++``` ++ ++```bash ++huwei@dell:~/huawei-classic-flang/kmpl-flang/test$ flang demo.f90 ⭐️未加引导语 ++huwei@dell:~/huawei-classic-flang/kmpl-flang/test$ ./a.out ++ Enter the size of the array: ++ darray( 1 , 1 ) = 1.000000 ++ darray( 1 , 2 ) = 2.000000 ++ darray( 2 , 1 ) = 2.000000 ++ darray( 2 , 2 ) = 4.000000 ++ addr darray(1,1) 12224000 ++ addr darray(2,1) 12224004 ++ addr darray(1,2) 12224008 ++ addr darray(2,2) 12224012 ++huwei@dell:~/huawei-classic-flang/kmpl-flang/test$ vim demo.f90 ⭐️加上引导语 ++huwei@dell:~/huawei-classic-flang/kmpl-flang/test$ flang demo.f90 ++huwei@dell:~/huawei-classic-flang/kmpl-flang/test$ ./a.out ++ Enter the size of the array: ++ darray( 1 , 1 ) = 1.000000 ++ darray( 1 , 2 ) = 2.000000 ++ darray( 2 , 1 ) = 2.000000 ++ darray( 2 , 2 ) = 4.000000 ++ addr darray(1,1) 31233536 ++ addr darray(2,1) 31233540 ++ addr darray(1,2) 31233544 ++ addr darray(2,2) 31233548 ++``` ++ ++ ++ ++ ++ ++### 4.2.`attributes align` ++ ++- `!DIR$ ATTRIBUTES ALIGN: n:: object` ++ ++​ `ATTRIBUTES ALIGN`指定**变量**以及**派生类型**(derived types)的可分配(allocatable )或指针(pointer)组件的字节对齐方式。 ++ ++```fortran ++program main ++ implicit none ++ real, dimension (:,:), allocatable :: darray ++ integer :: s1, s2 ++ integer :: i, j ++ print*, "Enter the size of the array:" ++ s1=2 ++ s2=2 ++ ! allocate memory ++ allocate ( darray(s1,s2) ) ++ !dir$ attributes align : 64 : darray ++ do i = 1, s1 ++ do j = 1, s2 ++ darray(i,j) = i*j ++ print*, "darray(",i,",",j,") = ", darray(i,j) ++ end do ++ end do ++ ++ write(*,*) 'addr darray(1,1)' , loc( darray(1,1) ) ++ write(*,*) 'addr darray(2,1)' , loc( darray(2,1) ) ++ write(*,*) 'addr darray(1,2)' , loc( darray(1,2) ) ++ write(*,*) 'addr darray(2,2)' , loc( darray(2,2) ) ++ deallocate (darray) ++end program main ++ ++``` ++ ++ ++ ++```bash ++huwei@dell:~/huawei-classic-flang/kmpl-flang/test$ flang demo.f90 ⭐️未加引导语 ++huwei@dell:~/huawei-classic-flang/kmpl-flang/test$ ./a.out ++ Enter the size of the array: ++ darray( 1 , 1 ) = 1.000000 ++ darray( 1 , 2 ) = 2.000000 ++ darray( 2 , 1 ) = 2.000000 ++ darray( 2 , 2 ) = 4.000000 ++ addr darray(1,1) 13596160 ++ addr darray(2,1) 13596164 ++ addr darray(1,2) 13596168 ++ addr darray(2,2) 13596172 ++huwei@dell:~/huawei-classic-flang/kmpl-flang/test$ vim demo.f90 ⭐️加上引导语 ++huwei@dell:~/huawei-classic-flang/kmpl-flang/test$ flang demo.f90 ++huwei@dell:~/huawei-classic-flang/kmpl-flang/test$ ./a.out ++ Enter the size of the array: ++ darray( 1 , 1 ) = 1.000000 ++ darray( 1 , 2 ) = 2.000000 ++ darray( 2 , 1 ) = 2.000000 ++ darray( 2 , 2 ) = 4.000000 ++ addr darray(1,1) 30422528 ++ addr darray(2,1) 30422532 ++ addr darray(1,2) 30422536 ++ addr darray(2,2) 30422540 ++ ++``` ++ ++ ++ ++### 4.3.`vector aligned` ++ ++比如 ++ ++```fortran ++!dir$ vector aligned ++``` ++ ++​ 使用`vector aligned`指令,该语句导致编译器针对所有数组引用使用对齐数据移动指令。 ++ ++​ 本部分样例,抽取自classic flang15.0.3的test文件夹。 ++ ++```fortran ++!dir$ vector aligned ++!dir$ SIMD ++ do icol=1,ncol ++ indbound(icol) = tbound(icol) - 159._r8 ++ if (indbound(icol) .lt. 1) then ++ indbound(icol) = 1 ++ elseif (indbound(icol) .gt. 180) then ++ indbound(icol) = 180 ++ endif ++ tbndfrac(icol) = tbound(icol) - 159._r8 - float(indbound(icol)) ++ indlev0(icol) = tz(icol,0) - 159._r8 ++ if (indlev0(icol) .lt. 1) then ++ indlev0(icol) = 1 ++ elseif (indlev0(icol) .gt. 180) then ++ indlev0(icol) = 180 ++ endif ++ t0frac(icol) = tz(icol,0) - 159._r8 - float(indlev0(icol)) ++ laytrop(icol) = 0 ++ ++ ! Begin layer loop ++ end do ++``` ++ ++ ++ ++ ++ ++## 5.总结classic flang整体的内存对齐能力 ++ ++​ 整体上来说,classic flang对基础数据类型、符合类型等有基于kind的考量,算是基本的align支持。而classic flang对align其他方面的支持很少,尽管是目前最新的classic flang15.0.3,也只有3个相关的引导语。用途类似intel的ifort编译器用法。 ++ ++​ 总的来说,classic flang关于Fortran的内存对齐扩展支持很少,但是classic flang的IR生成部分有align字段,还是可以为后续用户对classic flang做定制化的内存对齐能力做一定的基本支持。 ++ ++ ++ ++ ++ ++## 附录 ++ ++### 附录1-构建classic flang编译环境 ++ ++> 笔者以自己的生产环境(关键/信息已进行替换处理)为例,构建classic flang12.0.1编译环境,示范如下 ++ ++Tips:如果想要构建目前最新版的`classic flang15.0.3`,请自行修改llvm和flang仓库如下: ++ ++- classic flang的llvm仓库:https://github.com/flang-compiler/classic-flang-llvm-project ++ - checkout到`release_15x`分支 ++- classic flang的flang仓库:https://github.com/flang-compiler/flang ++ - 默认的master分支 ++ ++ ++ ++ ++ ++#### 1.笔者的硬件环境+操作系统 ++ ++- Architecture:aarch64 ++- OS:Ubuntu 18.04.3 LTS ++ ++```bash ++huwei@complier:~/arm-classic-flang$ uname -a ++Linux complier 4.15.0-70-generic #79-Ubuntu SMP Tue Nov 12 10:36:10 UTC 2019 aarch64 aarch64 aarch64 GNU/Linux ++ ++huwei@complier:~/arm-classic-flang$ head -n 1 /etc/issue ++Ubuntu 18.04.3 LTS \n \l ++``` ++ ++ ++ ++#### 2.主要依赖项安装 ++ ++​ python、高版本的gcc、g++和cmake3.13.4及更高版本等,请联系你的Linux管理员进行安装 ++ ++笔者使用的是: ++ ++- gcc9.4.0 ++- g++9.4.0 ++- cmake3.25.0 ++- Python 2.7.17 ++ ++ ++ ++#### 3.准备源代码 ++ ++新建一个目录,比如`mkdir arm-classic-flang`,然后`cd arm-classic-flan` 进入该目录 ++ ++准备「classic-flang-llvm-project」在如下目录 ++ ++```bash ++huwei@complier:~/arm-classic-flang$ pwd ++/home/huwei/arm-classic-flang ++``` ++ ++操作如下 ++ ++```bash ++git clone https://github.com/flang-compiler/classic-flang-llvm-project ++cd classic-flang-llvm-project ++git checkout release_12x #检出release12版本的llvm ++``` ++ ++仿照上面的方式,准备「flang」在如下目录 ++ ++```bash ++huwei@complier:~/arm-classic-flang$ pwd ++/home/huwei/arm-classic-flang ++``` ++ ++操作如下 ++ ++```bash ++git clone https://gitee.com/src-openeuler/flang #获得Gitee上的flang ++cd flang ++tar -xzvf flang-flang_20210324.tar.gz ++mv flang-flang_20210324 ../kmpl-flang ++``` ++ ++ ++ ++#### 4.准备编译脚本 ++ ++##### 🔴build-llvm-project.sh ++ ++在如下目录操作 ++ ++```bash ++huwei@complier:~/arm-classic-flang/classic-flang-llvm-project$ pwd ++/home/huwei/arm-classic-flang/classic-flang-llvm-project ++``` ++ ++- 修改他的整个`build-llvm-project.sh`为下面的方式 ++ ++> 下面的脚本是对GitHub:https://github.com/flang-compiler/classic-flang-llvm-project/blob/release_12x/build-llvm-project.sh上的脚本的进一步改进,支持更多可配置 ++ ++```bash ++ ++#!/bin/bash ++ ++# Initialize our own variables: ++ ++TARGET="X86" ++INSTALL_PREFIX="/usr/local" ++NPROC=1 ++USE_CCACHE="0" ++DO_INSTALL="0" ++USE_SUDO="0" ++C_COMPILER_PATH="/usr/bin/gcc" ++CXX_COMPILER_PATH="/usr/bin/g++" ++BUILD_TYPE="Release" ++ ++set -e # Exit script on first error. ++ ++function print_usage { ++ echo "Usage: ./build-llvm-project.sh [options]"; ++ echo ""; ++ echo "Build and install classic-flang-llvm-project."; ++ echo "Run this script in a directory with project sources."; ++ echo "Example:"; ++ echo " $ git clone https://github.com/flang-compiler/classic-flang-llvm-project"; ++ echo " $ cd classic-flang-llvm-project"; ++ echo " $ .github/workflows/build-llvm-project.sh -t X86 -p /install/prefix/ \\"; ++ echo " $ -a /usr/bin/gcc-10 -b /usr/bin/g++-10 -i -s"; ++ echo ""; ++ echo "Options:"; ++ echo " -t Target to build for (X86, AArch64, PowerPC). Default: X86"; ++ echo " -p Install prefix. Default: /usr/local"; ++ echo " -n Number of parallel jobs. Default: 1"; ++ echo " -c Use ccache. Default: 0 - do not use ccache"; ++ echo " -i Install the build. Default 0 - just build, do not install"; ++ echo " -s Use sudo to install. Default: 0 - do not use sudo"; ++ echo " -a C compiler path. Default: /usr/bin/gcc"; ++ echo " -b C++ compiler path. Default: /usr/bin/g++"; ++ echo " -e Build type. Default: Release"; ++} ++while getopts "t:p:n:c?i?s?a:b:e:" opt; do ++ case "$opt" in ++ t) TARGET=$OPTARG;; ++ p) INSTALL_PREFIX=$OPTARG;; ++ n) NPROC=$OPTARG;; ++ c) USE_CCACHE="1";; ++ i) DO_INSTALL="1";; ++ s) USE_SUDO="1";; ++ a) C_COMPILER_PATH=$OPTARG;; ++ b) CXX_COMPILER_PATH=$OPTARG;; ++ e) BUILD_TYPE=$OPTARG;; ++ ?) print_usage; exit 0;; ++ esac ++done ++ ++CMAKE_OPTIONS="-DCMAKE_INSTALL_PREFIX=$INSTALL_PREFIX \ ++ -DCMAKE_BUILD_TYPE=$BUILD_TYPE \ ++ -DCMAKE_C_COMPILER=$C_COMPILER_PATH \ ++ -DCMAKE_CXX_COMPILER=$CXX_COMPILER_PATH \ ++ -DLLVM_TARGETS_TO_BUILD=$TARGET \ ++ -DLLVM_ENABLE_CLASSIC_FLANG=ON" ++# Warning: the -DLLVM_ENABLE_PROJECTS option is specified with cmake ++# to avoid issues with nested quotation marks ++ ++if [ $USE_CCACHE == "1" ]; then ++ echo "Build using ccache" ++ CMAKE_OPTIONS="$CMAKE_OPTIONS \ ++ -DCMAKE_C_COMPILER_LAUNCHER=ccache \ ++ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache" ++fi ++ ++# Build and install ++mkdir -p build && cd build ++cmake $CMAKE_OPTIONS -DLLVM_ENABLE_PROJECTS="clang;openmp" ../llvm ++make -j$NPROC ++if [ $DO_INSTALL == "1" ]; then ++ if [ $USE_SUDO == "1" ]; then ++ echo "Install with sudo" ++ sudo make install -j$NPROC ++ else ++ echo "Install without sudo" ++ make install -j$NPROC ++ fi ++fi ++cd .. ++``` ++ ++ ++ ++##### 🟡build-flang.sh ++ ++```bash ++huwei@complier:~/arm-classic-flang/kmpl-flang$ pwd ++/home/huwei/arm-classic-flang/kmpl-flang ++``` ++ ++在如下目录操作 ++ ++新建`build-flang.sh`脚本,填充如下 ++ ++> 下面的脚本是对GitHub:https://github.com/flang-compiler/flang/blob/master/build-flang.sh上的脚本的进一步改进,支持更多可配置 ++ ++```bash ++#!/bin/bash ++ ++# A POSIX variable ++ ++OPTIND=1 # Reset in case getopts has been used previously in the shell. ++ ++# Initialize our own variables: ++ ++TARGET="X86" ++INSTALL_PREFIX="/usr/local" ++NPROC=1 ++USE_CCACHE="0" ++USE_SUDO="0" ++C_COMPILER_PATH="$INSTALL_PREFIX/bin/clang" ++CXX_COMPILER_PATH="$INSTALL_PREFIX/bin/clang++" ++BUILD_TYPE="Release" ++ ++set -e # Exit the script on first error. ++ ++function print_usage { ++ echo "Usage: ./build-flang.sh [options]"; ++ echo ""; ++ echo "Build and install libpgmath and flang."; ++ echo "Run this script in a directory with flang sources."; ++ echo "Example:"; ++ echo " $ git clone https://github.com/flang-compiler/flang"; ++ echo " $ cd flang"; ++ echo " $ .github/workflows/build-flang.sh -t X86 -p /install/prefix/ -n 2 -s"; ++ echo ""; ++ echo "Options:"; ++ echo " -t Target to build for (X86, AArch64, PowerPC). Default: X86"; ++ echo " -p Install prefix. Default: /usr/local"; ++ echo " -n Number of parallel jobs. Default: 1"; ++ echo " -c Use ccache. Default: 0 - do not use ccache"; ++ echo " -s Use sudo to install. Default: 0 - do not use sudo"; ++ echo " -e Build type(Release, Debug). Default: Release"; ++ echo " -a C/C++ compiler(gcc, clang). Default: clang"; ++} ++ ++while getopts "t:p:n:c?s?e:a:" opt; do ++ case "$opt" in ++ t) TARGET=$OPTARG;; ++ p) INSTALL_PREFIX=$OPTARG;; ++ n) NPROC=$OPTARG;; ++ c) USE_CCACHE="1";; ++ s) USE_SUDO="1";; ++ e) BUILD_TYPE=$OPTARG;; ++ a) COMPILER=$OPTARG;; ++ ?) print_usage; exit 0;; ++ esac ++done ++ ++if [ $COMPILER == "gcc" ]; then ++ echo "Build with GCC" ++ C_COMPILER_PATH="/usr/bin/gcc" ++ CXX_COMPILER_PATH="/usr/bin/g++" ++fi ++ ++CMAKE_OPTIONS="-DCMAKE_INSTALL_PREFIX=$INSTALL_PREFIX \ ++ -DCMAKE_BUILD_TYPE=$BUILD_TYPE \ ++ -DCMAKE_CXX_COMPILER=$CXX_COMPILER_PATH \ ++ -DCMAKE_C_COMPILER=$C_COMPILER_PATH \ ++ -DLLVM_TARGETS_TO_BUILD=$TARGET" ++ ++if [ $USE_CCACHE == "1" ]; then ++ echo "Build using ccache" ++ CMAKE_OPTIONS="$CMAKE_OPTIONS \ ++ -DCMAKE_C_COMPILER_LAUNCHER=ccache \ ++ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache" ++fi ++ ++# Build and install libpgmath ++ ++cd runtime/libpgmath ++mkdir -p build && cd build ++cmake $CMAKE_OPTIONS .. ++make -j$NPROC ++if [ $USE_SUDO == "1" ]; then ++ echo "Install with sudo" ++ sudo make install -j$NPROC ++else ++ echo "Install without sudo" ++ make install -j$NPROC ++fi ++ ++cd ../../.. ++ ++# Build and install flang ++ ++mkdir -p build && cd build ++cmake $CMAKE_OPTIONS \ ++ -DCMAKE_Fortran_COMPILER=$INSTALL_PREFIX/bin/flang \ ++ -DCMAKE_Fortran_COMPILER_ID=Flang \ ++ -DFLANG_INCLUDE_DOCS=ON \ ++ -DFLANG_LLVM_EXTENSIONS=ON \ ++ -DWITH_WERROR=OFF \ ++ .. ++make -j$NPROC ++if [ $USE_SUDO == "1" ]; then ++ echo "Install with sudo" ++ sudo make install -j$NPROC ++else ++ echo "Install without sudo" ++ make install -j$NPROC ++fi ++``` ++ ++ ++ ++##### 🟢build.sh ++ ++在如下目录操作 ++ ++```bash ++huwei@complier:~/arm-classic-flang$ pwd ++/home/huwei/arm-classic-flang ++``` ++ ++新建`build.sh`脚本,填充如下 ++ ++> 脚本参考自公司:https://github.com/compiler-dev ++ ++```bash ++#mkdir install dir ++CURDIR=`pwd` ++mkdir install ++INSTALLDIR=$CURDIR/install #设置你的flang最后安装在这个路径,后边给你用于加入PATH ++ ++#build llvm ++cd $CURDIR/classic-flang-llvm-project ++bash build-llvm-project.sh -n 16 -t AArch64 -i -p $INSTALLDIR ++#-t AArch64设置目标平台为AArch64 ++#-n 16是使用16线程编译,影响我们的编译时间 ++ ++#build flang and test ++cd $CURDIR/kmpl-flang ++bash build-flang.sh -n 16 -t AArch64 -p $INSTALLDIR -a gcc ++ ++cp $CURDIR/classic-flang-llvm-project/build/bin/FileCheck ./build/bin ++cp $CURDIR/classic-flang-llvm-project/build/bin/llvm-lit ./build/bin ++ ++cd build ++make check-flang #编译完后,进行回归测试,供你后边开发使用 ++``` ++ ++ ++ ++#### 5.自动化编译和测试 ++ ++在如下目录 ++ ++```bash ++huwei@complier:~/arm-classic-flang$ pwd ++/home/huwei/arm-classic-flang ++huwei@complier:~/arm-classic-flang$ ls ++build.sh classic-flang-llvm-project flang kmpl-flang ++``` ++ ++进行操作 ++ ++```bash ++bash build.sh > build.log 2>&1 ++``` ++ ++该操作会将编译的**任何报错情况**和**最终安装的路径**和回**归测试结果**重定向到build.log,供读者分析 ++ ++ ++ ++ ++ ++#### 6.写bash脚本,进行source,即可使用 ++ ++​ 为了方便后续我们自己或该Linux上其余用户使用你构建的flang,可以写脚本如下:请自行将`home/huwei/arm-classic-flang/`这样的目录替换成你的相应目录,cat我的sh脚本如下。 ++ ++```bash ++huwei@complier:~$ pwd ++/home/huwei ++huwei@complier:~$ cat myflang.sh ++export LD_LIBRARY_PATH=/home/huwei/arm-classic-flang/install/lib:$LD_LIBRARY_PATH ++export PATH=/home/huwei/arm-classic-flang/install/bin:$PATH ++``` ++ ++ ++ ++##### 6.1.Linux上其余用户source方式 ++ ++```bash ++whoway@dell:~$ source /home/huwei/myflang.sh ++``` ++ ++类似如上,即可使用我们构建的classic flang ++ ++ ++ ++##### 6.2.方便用户每次新建bash的source方式 ++ ++- 修改`.bashrc`文件即可`vim ~/.bashrc` ++- 在末尾添加`source /home/huwei/myflang.sh `语句 ++ ++至此,后续我们重新登录bash终端,会自动source ++ ++ ++ ++#### 7.查询我们安装的classic flang版本 ++ ++- 注意,我们通过源码方式安装的classic flang ++ ++```bash ++huwei@complier:~/arm-classic-flang$ flang -v ++flang version 12.0.1 (https://github.com/flang-compiler/classic-flang-llvm-project 120b19be0fd37a2ebe39f96a99281519512c8e2f) ++Target: aarch64-unknown-linux-gnu ++Thread model: posix ++InstalledDir: /home/huwei/arm-classic-flang/install/bin ++Found candidate GCC installation: /usr/lib/gcc/aarch64-linux-gnu/10 ++Found candidate GCC installation: /usr/lib/gcc/aarch64-linux-gnu/7 ++Found candidate GCC installation: /usr/lib/gcc/aarch64-linux-gnu/7.5.0 ++Found candidate GCC installation: /usr/lib/gcc/aarch64-linux-gnu/8 ++Found candidate GCC installation: /usr/lib/gcc/aarch64-linux-gnu/9 ++Selected GCC installation: /usr/lib/gcc/aarch64-linux-gnu/10 ++Candidate multilib: .;@m64 ++Selected multilib: .;@m64 ++``` ++ ++ ++ ++ ++ ++## 参考资料 ++ ++- [1] https://github.com/flang-compiler/flang ++- [2] F2008标准,ISO/IEC 1539-1:2010 (E) ++- [3] https://github.com/flang-compiler/classic-flang-llvm-project/blob/release_12x/build-llvm-project.sh ++- [4] https://github.com/flang-compiler/flang/blob/master/build-flang.sh ++- [5] F2018 Interpretation,2018-08-28 WD 1539-1 J3/18-007r1 ++- [6] ISO-IECJTC1-SC22-WG5_N2146_Fortran_2018_Draft_International_Standard_for_Ballot ++- [7] https://developer.arm.com/documentation/101380/2201/Supporting-reference-information/Support-level-definitions ++- [8] https://github.com/compiler-dev ++- [9] 胡伟,Fortran主流编译器(gfortran+ifort)内存对齐辅助能力分析报告 ++ ++ ++ +-- +2.17.1 + diff --git a/flang.spec b/flang.spec index 330967d..549c575 100644 --- a/flang.spec +++ b/flang.spec @@ -2,7 +2,7 @@ Name: flang Version: flang_20210324 -Release: 13 +Release: 14 Summary: Fortran language compiler targeting LLVM License: Apache-2.0 @@ -18,6 +18,7 @@ Patch3: 4-add-test-cases-for-openmp-optimization.patch Patch4: 5-test-for-interoperability-with-c-c-call-fortran.patch Patch5: 6-Add-test-cases-for-types.patch Patch6: 7-add-test-cases-for-attribute-declarations-and-specifications.patch +Patch7: 8-add-fortran-and-gfortran-ifort-classic-flang-memory-align-investigation.patch %description Flang depends on a fork of the LLVM project (https://github.com/flang-compiler/classic-flang-llvm-project). The fork made some changes to the upstream LLVM project to support Flang toolchain. Flang cannot build independently for now. @@ -39,6 +40,9 @@ TODO: support build Flang. %changelog +* Mon Dec 5 2022 huwei - flang_20210324-14 +- Add 8-add-fortran-and-gfortran-ifort-classic-flang-memory-align-investigation.patch for fortran memory investigation + * Fri Nov 4 2022 xieyihui - flang_20210324-13 - Fix 4-add-test-cases-for-openmp-optimization.patch for add new test cases for OpenMP optimization -- Gitee