LIBINT  2.6.0
VRR_GTG_1d_xx_xx_vec.h
1 /*
2  * Copyright (C) 2004-2019 Edward F. Valeev
3  *
4  * This file is part of Libint.
5  *
6  * Libint is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * Libint is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with Libint. If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 #ifndef _libint2_src_lib_libint_vrrgtg1dxxxx_h_
22 #define _libint2_src_lib_libint_vrrgtg1dxxxx_h_
23 //#include "../../../../SIMD_wrapped_vector/template/simd_wrapped_vector.hpp"
24 #include <cstdlib>
25 #include <cassert>
26 #include <libint2.h>
27 #include <util_types.h>
28 
29 namespace libint2 {
30 
38  template <unsigned int CartesianAxis, int La, int Lb, int Lc, int Ld, bool vectorize>
39  struct VRR_GTG_1d_xx_xx {
40 
41  static void compute(const Libint_t* inteval,
42  VectorSIMD<double,npts>* target,
43  VectorSIMD<double,npts>* src0) {
44 
45  enum XYZ {x=0, y=1, z=2};
46  assert(CartesianAxis == x || CartesianAxis == y || CartesianAxis == z);
47  //assert(vectorize == false);
48 
49  const unsigned int veclen = vectorize ? inteval->veclen : 1;
50 
51  // corner case: (00|00)
52  if (La == 0 && Lb == 0 && Lc == 0 && Ld == 0) {
53  for (unsigned int v=0; v!=veclen; ++v)
54  target[v] = src0[v];
55  return;
56  }
57 
58  //---------------------------------------------
59  // Part (1): build (a+b 0|c+d 0)
60  //---------------------------------------------
61 
62  VectorSIMD<double,npts> apb_0_GTG_cpd_0[La+Lb+1][Lc+Ld+1];
63  apb_0_GTG_cpd_0[0][0] = src0[0];
64 
65  const VectorSIMD<double,npts> *pfac0_0, *pfac0_1;
66  const VectorSIMD<double,npts> *pfac1_0 = inteval->R12kG12_pfac1_0;
67  const VectorSIMD<double,npts> *pfac1_1 = inteval->R12kG12_pfac1_1;
68  const VectorSIMD<double,npts> *pfac2 = inteval->R12kG12_pfac2;
69  switch (CartesianAxis) {
70  case x:
71  pfac0_0 = inteval->R12kG12_pfac0_0_x;
72  pfac0_1 = inteval->R12kG12_pfac0_1_x;
73  break;
74  case y:
75  pfac0_0 = inteval->R12kG12_pfac0_0_y;
76  pfac0_1 = inteval->R12kG12_pfac0_1_y;
77  break;
78  case z:
79  pfac0_0 = inteval->R12kG12_pfac0_0_z;
80  pfac0_1 = inteval->R12kG12_pfac0_1_z;
81  break;
82  default: assert(false);
83  }
84 
85  // build (0 0|1 0)
86  if (Lc+Ld > 0) {
87  apb_0_GTG_cpd_0[0][1] = pfac0_1[0] * apb_0_GTG_cpd_0[0][0];
88 #if LIBiINT2_FLOP_COUNT
89  inteval->nflops[0] += 1;
90 #endif
91  }
92 
93  // build (0 0|c+d 0)
94  if (Lc+Ld > 1) {
95  for(int c_plus_d=1; c_plus_d!=Lc+Ld; ++c_plus_d) {
96  apb_0_GTG_cpd_0[0][c_plus_d+1] = pfac0_1[0] * apb_0_GTG_cpd_0[0][c_plus_d] +
97  c_plus_d * pfac1_1[0] * apb_0_GTG_cpd_0[0][c_plus_d-1];
98  }
99 #if LIBINT2_FLOP_COUNT
100  inteval->nflops[0] += 4*(Lc+Ld-1);
101 #endif
102  }
103 
104  // build (1 0|0 0)
105  if (La+Lb > 0) {
106  apb_0_GTG_cpd_0[1][0] = pfac0_0[0] * apb_0_GTG_cpd_0[0][0];
107 #if LIBINT2_FLOP_COUNT
108  inteval->nflops[0] += 1;
109 #endif
110  }
111 
112  // build (a+b 0|0 0)
113  if (La+Lb > 1) {
114  for(int a_plus_b=1; a_plus_b!=La+Lb; ++a_plus_b) {
115  apb_0_GTG_cpd_0[a_plus_b+1][0] = pfac0_0[0] * apb_0_GTG_cpd_0[a_plus_b][0] +
116  a_plus_b * pfac1_0[0] * apb_0_GTG_cpd_0[a_plus_b-1][0];
117  }
118 #if LIBINT2_FLOP_COUNT
119  inteval->nflops[0] += 4*(La+Lb-1);
120 #endif
121  }
122 
123  // build (1 0|c+d 0)
124  if (La+Lb > 0 && Lc+Ld > 0) {
125  for(int c_plus_d=1; c_plus_d<=Lc+Ld; ++c_plus_d) {
126  apb_0_GTG_cpd_0[1][c_plus_d] = pfac0_0[0] * apb_0_GTG_cpd_0[0][c_plus_d] +
127  c_plus_d * pfac2[0] * apb_0_GTG_cpd_0[0][c_plus_d-1];
128  }
129 #if LIBINT2_FLOP_COUNT
130  inteval->nflops[0] += 4*(Lc+Ld-1);
131 #endif
132  }
133 
134  // build (a+b 0|c+d 0)
135  if (La+Lb > 1 && Lc+Ld > 0) {
136  for(int a_plus_b=1; a_plus_b!=La+Lb; ++a_plus_b) {
137  for(int c_plus_d=1; c_plus_d<=Lc+Ld; ++c_plus_d) {
138  apb_0_GTG_cpd_0[a_plus_b+1][c_plus_d] = pfac0_0[0] * apb_0_GTG_cpd_0[a_plus_b][c_plus_d] +
139  a_plus_b * pfac1_0[0] * apb_0_GTG_cpd_0[a_plus_b-1][c_plus_d] +
140  c_plus_d * pfac2[0] * apb_0_GTG_cpd_0[a_plus_b][c_plus_d-1];
141  }
142  }
143 #if LIBINT2_FLOP_COUNT
144  inteval->nflops[0] += 7*(La+Lb-1)*(Lc+Ld-1);
145 #endif
146  }
147 
148  //---------------------------------------------
149  // Part (2): build (a b|c+d 0)
150  //---------------------------------------------
151 
152  double AB[1];
153  switch (CartesianAxis) {
154  case x:
155  std::cout<<"printing before segfault"<<std::endl;
156  AB[0] = inteval->AB_x[0];
157  break;
158  case y:
159  AB[0] = inteval->AB_y[0];
160  break;
161  case z:
162  AB[0] = inteval->AB_z[0];
163  break;
164  default: assert(false);
165  }
166 
167  VectorSIMD<double,npts> a_b_GTG_cpd_0[La+1][Lb+1][Lc+Ld+1];
168  for(int c_plus_d=0; c_plus_d<=Lc+Ld; ++c_plus_d) {
169  // copy (a+b 0| to a local 0,a+b buffer
170  VectorSIMD<double,npts> b_a_GTG[La+Lb+1][La+Lb+1];
171  for(int a_plus_b=0; a_plus_b<=La+Lb; ++a_plus_b) {
172  b_a_GTG[0][a_plus_b] = apb_0_GTG_cpd_0[a_plus_b][c_plus_d];
173  }
174  // use HRR to compute b,a
175  for(int b=1; b<=Lb; ++b) {
176  for(int a=0; a<=La+Lb-b; ++a) {
177  b_a_GTG[b][a] = b_a_GTG[b-1][a+1] + AB[0] * b_a_GTG[b-1][a];
178  }
179 #if LIBINT2_FLOP_COUNT
180  inteval->nflops[0] += 2 * (La+Lb-b+1);
181 #endif
182  }
183  // copy b,a to (a b|
184  for(int b=0; b<=Lb; ++b) {
185  for(int a=0; a<=La; ++a) {
186  a_b_GTG_cpd_0[a][b][c_plus_d] = b_a_GTG[b][a];
187  }
188  }
189  }
190 
191  //---------------------------------------------
192  // Part (3): build (a b|c d)
193  //---------------------------------------------
194 
195  double CD[1];
196  switch (CartesianAxis) {
197  case x:
198  CD[0] = inteval->CD_x[0];
199  break;
200  case y:
201  CD[0] = inteval->CD_y[0];
202  break;
203  case z:
204  CD[0] = inteval->CD_z[0];
205  break;
206  default: assert(false);
207  }
208 
209  VectorSIMD<double,npts>* target_a_b_blk_ptr = target;
210  const int Nd = (Ld+1);
211  const int Ncd = (Lc+1)*Nd;
212  for(int a=0; a<=La; ++a) {
213  for(int b=0; b<=Lb; ++b, target_a_b_blk_ptr+=Ncd) {
214  // copy |c+d 0) to a local 0,c+d buffer
215  VectorSIMD<double,npts> d_c_GTG[Lc+Ld+1][Lc+Ld+1];
216  for(int c_plus_d=0; c_plus_d<=Lc+Ld; ++c_plus_d) {
217  d_c_GTG[0][c_plus_d] = a_b_GTG_cpd_0[a][b][c_plus_d];
218  }
219  // use HRR to compute d,c
220  for(int d=1; d<=Ld; ++d) {
221  for(int c=0; c<=Lc+Ld-d; ++c) {
222  d_c_GTG[d][c] = d_c_GTG[d-1][c+1] + CD[0] * d_c_GTG[d-1][c];
223  }
224 #if LIBINT2_FLOP_COUNT
225  inteval->nflops[0] += 2 * (Lc+Ld-d+1);
226 #endif
227  }
228  // copy d,c to |c d)
229  for(int d=0; d<=Ld; ++d) {
230  for(int c=0, cd=d; c<=Lc; ++c, cd+=Nd) {
231  target_a_b_blk_ptr[cd] = d_c_GTG[d][c];
232  }
233  }
234  }
235  }
236 
237  // done
238  }
239 
240  };
241 
242 };
243 
244 #endif // header guard
245 
Defaults definitions for various parameters assumed by Libint.
Definition: algebra.cc:24