最近在學習CUDA,我是在系統下的 下運行的cuda by 一書中的程序示例遇到的問題,安裝的.4.12,按照一般配置步驟配置好后,執行程序出現以下報錯無法定位程序輸入點與動態鏈接庫,查看報錯的“無法定位程序輸入點 于動態鏈接庫.dll”,該dll我文件在目錄下,之前配置的時候環境變量也加了,不知道在路徑加進去、文件也有的情況下,為啥會出現這種情況無法定位程序輸入點與動態鏈接庫,不管我把這個文件復制到那個路徑下,執行都會報這個錯。有遇到過同樣問題的前輩可以指點一下嘛~
環境變量配置好了:
內的庫目錄配置,附加依賴項名字也都加進去了:
程序代碼:(就是cuda by 書中第六章例子)
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation.
* Any use, reproduction, disclosure, or distribution of this software
* and related documentation without an express license agreement from
* NVIDIA Corporation is strictly prohibited.
*
* Please refer to the applicable NVIDIA end user license agreement (EULA)
* associated with this source code for terms and conditions that govern
* your use of this NVIDIA software.
*
*/
#include "cuda.h"
#include "D:/exercise/cuda_demo/common/book.h"
#include "D:/exercise/cuda_demo/common/image.h"
#define DIM 1024
#define PI 3.1415926535897932f
#define MAX_TEMP 1.0f
#define MIN_TEMP 0.0001f
#define SPEED 0.25f
// these exist on the GPU side
texture<float> texConstSrc;
texture<float> texIn;
texture<float> texOut;
// this kernel takes in a 2-d array of floats
// it updates the value-of-interest by a scaled value based

// on itself and its nearest neighbors
__global__ void blend_kernel(float* dst,
bool dstOut) {
// map from threadIdx/BlockIdx to pixel position
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int offset = x + y * blockDim.x * gridDim.x;
int left = offset - 1;
int right = offset + 1;
if (x == 0) left++;
if (x == DIM - 1) right--;
int top = offset - DIM;
int bottom = offset + DIM;
if (y == 0) top += DIM;
if (y == DIM - 1) bottom -= DIM;
float t, l, c, r, b;
if (dstOut) {
t = tex1Dfetch(texIn, top);
l = tex1Dfetch(texIn, left);
c = tex1Dfetch(texIn, offset);
r = tex1Dfetch(texIn, right);
b = tex1Dfetch(texIn, bottom);
}
else {
t = tex1Dfetch(texOut, top);
l = tex1Dfetch(texOut, left);
c = tex1Dfetch(texOut, offset);
r = tex1Dfetch(texOut, right);
b = tex1Dfetch(texOut, bottom);
}
dst[offset] = c + SPEED * (t + b + r + l - 4 * c);
}
// NOTE - texOffsetConstSrc could either be passed as a
// parameter to this function, or passed in __constant__ memory
// if we declared it as a global above, it would be
// a parameter here:
// __global__ void copy_const_kernel( float *iptr,

// size_t texOffset )
__global__ void copy_const_kernel(float* iptr) {
// map from threadIdx/BlockIdx to pixel position
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int offset = x + y * blockDim.x * gridDim.x;
float c = tex1Dfetch(texConstSrc, offset);
if (c != 0)
iptr[offset] = c;
}
// globals needed by the update routine
struct DataBlock {
unsigned char* output_bitmap;
float* dev_inSrc;
float* dev_outSrc;
float* dev_constSrc;
IMAGE* bitmap;
cudaEvent_t start, stop;
float totalTime;
float frames;
};
void anim_gpu(DataBlock * d, int ticks) {
}
// clean up memory allocated on the GPU
void cleanup(DataBlock * d) {
cudaUnbindTexture(texIn);
cudaUnbindTexture(texOut);
cudaUnbindTexture(texConstSrc);
HANDLE_ERROR(cudaFree(d->dev_inSrc));
HANDLE_ERROR(cudaFree(d->dev_outSrc));
HANDLE_ERROR(cudaFree(d->dev_constSrc));
HANDLE_ERROR(cudaEventDestroy(d->start));
HANDLE_ERROR(cudaEventDestroy(d->stop));
}

int main(void) {
DataBlock data;
IMAGE bitmap_image(DIM, DIM);
data.bitmap = &bitmap_image;
data.totalTime = 0;
data.frames = 0;
HANDLE_ERROR(cudaEventCreate(&data.start));
HANDLE_ERROR(cudaEventCreate(&data.stop));
int imageSize = bitmap_image.image_size();
HANDLE_ERROR(cudaMalloc((void**)& data.output_bitmap,
imageSize));
// assume float == 4 chars in size (ie rgba)
HANDLE_ERROR(cudaMalloc((void**)& data.dev_inSrc,
imageSize));
HANDLE_ERROR(cudaMalloc((void**)& data.dev_outSrc,
imageSize));
HANDLE_ERROR(cudaMalloc((void**)& data.dev_constSrc,
imageSize));
HANDLE_ERROR(cudaBindTexture(NULL, texConstSrc,
data.dev_constSrc,
imageSize));
HANDLE_ERROR(cudaBindTexture(NULL, texIn,
data.dev_inSrc,
imageSize));
HANDLE_ERROR(cudaBindTexture(NULL, texOut,
data.dev_outSrc,
imageSize));
// intialize the constant data
float* temp = (float*)malloc(imageSize);
for (int i = 0; i < DIM * DIM; i++) {
temp[i] = 0;
int x = i % DIM;
int y = i / DIM;
if ((x > 300) && (x < 600) && (y > 310) && (y < 601))

temp[i] = MAX_TEMP;
}
temp[DIM * 100 + 100] = (MAX_TEMP + MIN_TEMP) / 2;
temp[DIM * 700 + 100] = MIN_TEMP;
temp[DIM * 300 + 300] = MIN_TEMP;
temp[DIM * 200 + 700] = MIN_TEMP;
for (int y = 800; y < 900; y++) {
for (int x = 400; x < 500; x++) {
temp[x + y * DIM] = MIN_TEMP;
}
}
HANDLE_ERROR(cudaMemcpy(data.dev_constSrc, temp,
imageSize,
cudaMemcpyHostToDevice));
// initialize the input data
for (int y = 800; y < DIM; y++) {
for (int x = 0; x < 200; x++) {
temp[x + y * DIM] = MAX_TEMP;
}
}
HANDLE_ERROR(cudaMemcpy(data.dev_inSrc, temp,
imageSize,
cudaMemcpyHostToDevice));
free(temp);
int ticks = 0;
bitmap_image.show_image(30);
while (1)
{
HANDLE_ERROR(cudaEventRecord(data.start, 0));
dim3 blocks(DIM / 16, DIM / 16);
dim3 threads(16, 16);
IMAGE * bitmap = data.bitmap;
// since tex is global and bound, we have to use a flag to
// select which is in/out per iteration
volatile bool dstOut = true;
for (int i = 0; i < 90; i++)
{
float* in, * out;
if (dstOut)

{
in = data.dev_inSrc;
out = data.dev_outSrc;
}
else
{
out = data.dev_inSrc;
in = data.dev_outSrc;
}
copy_const_kernel << > > (in);
blend_kernel << > > (out, dstOut);
dstOut = !dstOut;
}
float_to_color << > > (data.output_bitmap,
data.dev_inSrc);
HANDLE_ERROR(cudaMemcpy(bitmap->get_ptr(),
data.output_bitmap,
bitmap->image_size(),
cudaMemcpyDeviceToHost));
HANDLE_ERROR(cudaEventRecord(data.stop, 0));
HANDLE_ERROR(cudaEventSynchronize(data.stop));
float elapsedTime;
HANDLE_ERROR(cudaEventElapsedTime(&elapsedTime,
data.start, data.stop));
data.totalTime += elapsedTime;
++data.frames;
printf("Average Time per frame: %3.1f ms\n",
data.totalTime / data.frames);
ticks++;
char key = bitmap_image.show_image(30);
if (key == 27)
{
break;
}
}
cleanup(&data);
return 0;
}