久久久久久久久久久熟女AV,国产91在线|日韩,国内一级毛片

欄目導航

新聞資訊

新聞資訊

最近在學習CUDA，我是在系統下的下運行的cuda by 一書中的程序示例遇到的問題，安裝的.4.12，按照一般配置步驟配置好后，執行程序出現以下報錯無法定位程序輸入點與動態鏈接庫，查看報錯的“無法定位程序輸入點于動態鏈接庫.dll”，該dll我文件在目錄下，之前配置的時候環境變量也加了，不知道在路徑加進去、文件也有的情況下，為啥會出現這種情況無法定位程序輸入點與動態鏈接庫，不管我把這個文件復制到那個路徑下，執行都會報這個錯。有遇到過同樣問題的前輩可以指點一下嘛~

環境變量配置好了：

內的庫目錄配置，附加依賴項名字也都加進去了：

程序代碼：（就是cuda by 書中第六章例子）

/*
 * Copyright 1993-2010 NVIDIA Corporation.  All rights reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property and
 * proprietary rights in and to this software and related documentation.
 * Any use, reproduction, disclosure, or distribution of this software
 * and related documentation without an express license agreement from
 * NVIDIA Corporation is strictly prohibited.
 *
 * Please refer to the applicable NVIDIA end user license agreement (EULA)
 * associated with this source code for terms and conditions that govern
 * your use of this NVIDIA software.
 *
 */
#include "cuda.h"
#include "D:/exercise/cuda_demo/common/book.h"
#include "D:/exercise/cuda_demo/common/image.h"
#define DIM 1024
#define PI 3.1415926535897932f
#define MAX_TEMP 1.0f
#define MIN_TEMP 0.0001f
#define SPEED   0.25f
 // these exist on the GPU side
texture<float>  texConstSrc;
texture<float>  texIn;
texture<float>  texOut;
// this kernel takes in a 2-d array of floats
// it updates the value-of-interest by a scaled value based

// on itself and its nearest neighbors
__global__ void blend_kernel(float* dst,
    bool dstOut) {
    // map from threadIdx/BlockIdx to pixel position
    int x = threadIdx.x + blockIdx.x * blockDim.x;
    int y = threadIdx.y + blockIdx.y * blockDim.y;
    int offset = x + y * blockDim.x * gridDim.x;
    int left = offset - 1;
    int right = offset + 1;
    if (x == 0)   left++;
    if (x == DIM - 1) right--;
    int top = offset - DIM;
    int bottom = offset + DIM;
    if (y == 0)   top += DIM;
    if (y == DIM - 1) bottom -= DIM;
    float   t, l, c, r, b;
    if (dstOut) {
        t = tex1Dfetch(texIn, top);
        l = tex1Dfetch(texIn, left);
        c = tex1Dfetch(texIn, offset);
        r = tex1Dfetch(texIn, right);
        b = tex1Dfetch(texIn, bottom);
    }
    else {
        t = tex1Dfetch(texOut, top);
        l = tex1Dfetch(texOut, left);
        c = tex1Dfetch(texOut, offset);
        r = tex1Dfetch(texOut, right);
        b = tex1Dfetch(texOut, bottom);
    }
    dst[offset] = c + SPEED * (t + b + r + l - 4 * c);
}
// NOTE - texOffsetConstSrc could either be passed as a
// parameter to this function, or passed in __constant__ memory
// if we declared it as a global above, it would be
// a parameter here: 
// __global__ void copy_const_kernel( float *iptr,

//                                    size_t texOffset )
__global__ void copy_const_kernel(float* iptr) {
    // map from threadIdx/BlockIdx to pixel position
    int x = threadIdx.x + blockIdx.x * blockDim.x;
    int y = threadIdx.y + blockIdx.y * blockDim.y;
    int offset = x + y * blockDim.x * gridDim.x;
    float c = tex1Dfetch(texConstSrc, offset);
    if (c != 0)
        iptr[offset] = c;
}
// globals needed by the update routine
struct DataBlock {
    unsigned char* output_bitmap;
    float* dev_inSrc;
    float* dev_outSrc;
    float* dev_constSrc;
    IMAGE* bitmap;
    cudaEvent_t     start, stop;
    float           totalTime;
    float           frames;
};
void anim_gpu(DataBlock * d, int ticks) {
}
// clean up memory allocated on the GPU
void cleanup(DataBlock * d) {
    cudaUnbindTexture(texIn);
    cudaUnbindTexture(texOut);
    cudaUnbindTexture(texConstSrc);
    HANDLE_ERROR(cudaFree(d->dev_inSrc));
    HANDLE_ERROR(cudaFree(d->dev_outSrc));
    HANDLE_ERROR(cudaFree(d->dev_constSrc));
    HANDLE_ERROR(cudaEventDestroy(d->start));
    HANDLE_ERROR(cudaEventDestroy(d->stop));
}

int main(void) {
    DataBlock   data;
    IMAGE bitmap_image(DIM, DIM);
    data.bitmap = &bitmap_image;
    data.totalTime = 0;
    data.frames = 0;
    HANDLE_ERROR(cudaEventCreate(&data.start));
    HANDLE_ERROR(cudaEventCreate(&data.stop));
    int imageSize = bitmap_image.image_size();
    HANDLE_ERROR(cudaMalloc((void**)& data.output_bitmap,
        imageSize));
    // assume float == 4 chars in size (ie rgba)
    HANDLE_ERROR(cudaMalloc((void**)& data.dev_inSrc,
        imageSize));
    HANDLE_ERROR(cudaMalloc((void**)& data.dev_outSrc,
        imageSize));
    HANDLE_ERROR(cudaMalloc((void**)& data.dev_constSrc,
        imageSize));
    HANDLE_ERROR(cudaBindTexture(NULL, texConstSrc,
        data.dev_constSrc,
        imageSize));
    HANDLE_ERROR(cudaBindTexture(NULL, texIn,
        data.dev_inSrc,
        imageSize));
    HANDLE_ERROR(cudaBindTexture(NULL, texOut,
        data.dev_outSrc,
        imageSize));
    // intialize the constant data
    float* temp = (float*)malloc(imageSize);
    for (int i = 0; i < DIM * DIM; i++) {
        temp[i] = 0;
        int x = i % DIM;
        int y = i / DIM;
        if ((x > 300) && (x < 600) && (y > 310) && (y < 601))

            temp[i] = MAX_TEMP;
    }
    temp[DIM * 100 + 100] = (MAX_TEMP + MIN_TEMP) / 2;
    temp[DIM * 700 + 100] = MIN_TEMP;
    temp[DIM * 300 + 300] = MIN_TEMP;
    temp[DIM * 200 + 700] = MIN_TEMP;
    for (int y = 800; y < 900; y++) {
        for (int x = 400; x < 500; x++) {
            temp[x + y * DIM] = MIN_TEMP;
        }
    }
    HANDLE_ERROR(cudaMemcpy(data.dev_constSrc, temp,
        imageSize,
        cudaMemcpyHostToDevice));
    // initialize the input data
    for (int y = 800; y < DIM; y++) {
        for (int x = 0; x < 200; x++) {
            temp[x + y * DIM] = MAX_TEMP;
        }
    }
    HANDLE_ERROR(cudaMemcpy(data.dev_inSrc, temp,
        imageSize,
        cudaMemcpyHostToDevice));
    free(temp);
    int ticks = 0;
    bitmap_image.show_image(30);
    while (1)
    {
        HANDLE_ERROR(cudaEventRecord(data.start, 0));
        dim3    blocks(DIM / 16, DIM / 16);
        dim3    threads(16, 16);
        IMAGE * bitmap = data.bitmap;
        // since tex is global and bound, we have to use a flag to
        // select which is in/out per iteration
        volatile bool dstOut = true;
        for (int i = 0; i < 90; i++)
        {
            float* in, * out;
            if (dstOut)

            {
                in = data.dev_inSrc;
                out = data.dev_outSrc;
            }
            else
            {
                out = data.dev_inSrc;
                in = data.dev_outSrc;
            }
            copy_const_kernel << > > (in);
            blend_kernel << > > (out, dstOut);
            dstOut = !dstOut;
        }
        float_to_color << > > (data.output_bitmap,
            data.dev_inSrc);
        HANDLE_ERROR(cudaMemcpy(bitmap->get_ptr(),
            data.output_bitmap,
            bitmap->image_size(),
            cudaMemcpyDeviceToHost));
        HANDLE_ERROR(cudaEventRecord(data.stop, 0));
        HANDLE_ERROR(cudaEventSynchronize(data.stop));
        float   elapsedTime;
        HANDLE_ERROR(cudaEventElapsedTime(&elapsedTime,
            data.start, data.stop));
        data.totalTime += elapsedTime;
        ++data.frames;
        printf("Average Time per frame:  %3.1f ms\n",
            data.totalTime / data.frames);
        ticks++;
        char key = bitmap_image.show_image(30);
        if (key == 27)
        {
            break;
        }
    }
    cleanup(&data);
    return 0;
}

欧美vvv,亚洲第一成人在线,亚洲成人欧美日韩在线观看,日本猛少妇猛色XXXXX猛叫