如何在 PMID 列中使用 TF-IDF

问题描述

数据框:

enter image description here

代码

import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2

TF_DEFAULT_BIT_WIDTH = 32


class CustomModel(tf.keras.Model):
    def get_rom(self):
        """
        Return model ROM in Mb.
        """
        rom_size_per_layer = []
        for layer in self.layers:  # <-- here self.layers is empty
            if layer.trainable_variables:
                trainable_sizes = [np.prod(var.get_shape().numpy(())) for var in layer.trainable_variables]
                trainable_var_num = sum(trainable_sizes)
                rom_size_per_layer.append(trainable_var_num * TF_DEFAULT_BIT_WIDTH / 8)
        return sum(rom_size_per_layer) / 1024 ** 2

model = MobileNetV2(
    input_shape=(224,224,3),classes=1000,weights="imagenet",include_top=True,classifier_activation=None,)
model.compile()
x = np.random.normal(size=(1,3))
y = model.predict(x)

# create custom model to use get_rom()
custom_model = CustomModel()
custom_model.from_config(model.get_config())

print(custom_model.get_rom())

解决方法

您可以使用 TfidfVectorizer

from sklearn.feature_extraction.text import TfidfVectorizer

v = TfidfVectorizer()
x = v.fit_transform(df['PMID'])