复制对象指针数组时,CUDA cudaMemcpu分段错误

问题描述

我试图将一个指针数组移动到设备,其中每个指针都指向一个类对象。但是,我在使用cudamemcpy的那一行出现了细分错误。我正在尝试遵循此post中使用的内容

main.cu

class YourSpider(Spider):
    books = {}
    ...

    def parse(self,response):
        # Get book info here.
        book_item = {
            'fullurl' : fullurl,'url' : url,'title' : title,'authors' : authors,'genres' : genres,'status' : status,'release' : release,'summary' : summary,'chapters' : []
        } 
        self.books[book_item['title']] = book_item
        chapter_urls = [..list of chapter urls]
        chapter_url = chapter_urls.pop()
        
        # This will trigger multiple request async
        for chapter_url in chapter_urls:
            yield scrapy.Request(
                url=chapter_url,callback=self.parse_chapter,Meta={'book': book}
            )

    def parse_chapter(self,response):
        book_title = response.Meta['book_title']

        # parse chapter data here

        chapter = {
            'title' : title,'content' : content,'book_url': self.book,'url' : response.url.split("/")[-2]
        }
        self.books[book_title].append(chapter)

        yield self.books[book_title]
         

testclass.cu

#include "testclass.cuh"
#include <iostream>


__global__ void printtest(Test* test){
    printf("HELLO FROM CUDA\n");
    printf("CUDA1 : %i\n",test->hello);
    Test test2(6);
    printf("CUDA2 : %i\n",test2.hello);
    printf("BYEEE FROM CUDA\n");

}

int main(){
    printf("hello\n");
    Test* test = new Test(512);
    printf("cpu : %i\n",test->hello);
    Test* devtest;
    cudamalloc(&devtest,sizeof(Test));
    cudaError_t err = cudamemcpy(devtest,test,sizeof(Test),cudamemcpyHostToDevice);
    if (err != cudaSuccess) {                                   
        fprintf(stderr,"Error %s at line %d in file %s\n",cudaGetErrorString(err),__LINE__-3,__FILE__);
    }
    printtest<<<1,1>>>(devtest);
    cudaDeviceSynchronize();



    printf("hello2\n");
    Test** test3 = new Test*[2];
    test3[0] = new Test(12299);
    test3[1] = new Test(234923);
    printf("cpu : %i\n",test3[0]->hello);
    Test** devtest3;
    cudamalloc(&devtest3,2*sizeof(Test*));
    printf("cpu2\n");
    err = cudamemcpy(devtest3[0],test3[0],__FILE__);
    }
    printf("cpu3\n");
    printtest<<<1,1>>>(devtest3[0]);
    cudaDeviceSynchronize();
}

testclass.cuh

#include "testclass.cuh"

__host__ __device__ Test::Test(int in){
    hello = in;
}

解决方法

使用@molbdnilo的注释解决。

main.cu

...
printf("hello2\n");
    Test** test3 = new Test*[2];
    test3[0] = new Test(12299);
    test3[1] = new Test(234923);
    printf("CPU : %i\n",test3[0]->hello);
    Test* devtest3[2];
    cudaMalloc(&devtest3[0],sizeof(Test));
    printf("CPU2\n");
    err = cudaMemcpy(devtest3[0],test3[0],sizeof(Test),cudaMemcpyHostToDevice);
    if (err != cudaSuccess) {                                   
        fprintf(stderr,"Error %s at line %d in file %s\n",cudaGetErrorString(err),__LINE__-3,__FILE__);
    }
    printf("CPU3\n");
    printtest<<<1,1>>>(devtest3[0]);
    cudaDeviceSynchronize();
...