问题描述
我希望使用 Foundry 中的 API 检索数据集中的记录和列数。我发现似乎显示记录数的 API 之一是 “.../monocle/api/table/stats”,但是我不知道如何通过数据集的rid。
任何帮助都会很棒。我最终尝试获取我管理的所有数据集的总列数、记录和大小,以便使用 Quiver 或 Slate 构建仪表板来显示我们在 Foundry 平台内管理的数据量。
解决方法
您可以使用以下示例代码来计算数据集的统计量:
import time
import requests
from urllib.parse import quote_plus
import json
def calculate_dataset_stats(token: str,dataset_rid: str,branch='master',api_base='https://foundry-stack.com'
) -> dict:
"""
Calculates statistics for last transaction of a dataset in a branch
Args:
dataset_rid: the dataset rid
branch: branch of the dataset
Returns: a dictionary with statistics
"""
start_stats_calculation = requests.post(f"{api_base}/foundry-stats/api/stats/datasets/"
f"{dataset_rid}/branches/{quote_plus(branch)}",headers={
'content-type': "application/json",'authorization': f"Bearer {token}",})
start_stats_calculation.raise_for_status()
metadata = start_stats_calculation.json()
transaction_rid = metadata['view']['endTransactionRid']
schema_id = metadata['view']['schemaId']
calculated_finished = False
maybe_stats = {
'status': 'FAILED'
}
while not calculated_finished:
response = requests.get(f"{api_base}/foundry-stats/api/stats/datasets/"
f"{dataset_rid}/branches/{quote_plus(branch)}",headers={
'content-type': "application/json",},params={
'endTransactionRid': transaction_rid,'schemaId': schema_id
})
response.raise_for_status()
maybe_stats = response.json()
if (maybe_stats['status'] == 'SUCCEEDED') or (maybe_stats['status'] == 'FAILED'):
calculated_finished = True
time.sleep(0.5)
if maybe_stats['status'] != 'SUCCEEDED':
raise ValueError(f'Stats Calculation failed for dataset {dataset_rid}. '
f'Failure handling not implemented.')
return maybe_stats['result']['succeededDatasetResult']['stats']
token = "eyJwb..."
dataset_rid = "ri.foundry.main.dataset.14703427-09ab-4c9c-b036-1234b34d150b"
stats = calculate_dataset_stats(token,dataset_rid)
print(json.dumps(stats,indent=4))