💻 実装例
1. テキスト内の機密データ検出
from google.cloud import dlp_v2
def inspect_text(project_id, text):
client = dlp_v2.DlpServiceClient()
info_types = [
{"name": "CREDIT_CARD_NUMBER"},
{"name": "EMAIL_ADDRESS"},
{"name": "PHONE_NUMBER"},
{"name": "US_SOCIAL_SECURITY_NUMBER"},
]
inspect_config = {
"info_types": info_types,
"min_likelihood": dlp_v2.Likelihood.POSSIBLE,
"include_quote": True,
}
item = {"value": text}
parent = f"projects/{project_id}"
response = client.inspect_content(
request={
"parent": parent,
"inspect_config": inspect_config,
"item": item,
}
)
if response.result.findings:
for finding in response.result.findings:
print(f"InfoType: {finding.info_type.name}")
print(f"Likelihood: {finding.likelihood.name}")
print(f"Quote: {finding.quote}")
print(f"Location: {finding.location.byte_range}")
print("-" * 50)
else:
print("機密データは検出されませんでした")
text = """
顧客情報:
名前: 山田太郎
メール: yamada@example.com
電話: 090-1234-5678
クレジットカード: 4111-1111-1111-1111
SSN: 123-45-6789
"""
inspect_text("my-project-id", text)
2. データの非識別化(マスキング)
from google.cloud import dlp_v2
def deidentify_with_mask(project_id, text):
client = dlp_v2.DlpServiceClient()
info_types = [
{"name": "CREDIT_CARD_NUMBER"},
{"name": "EMAIL_ADDRESS"},
{"name": "PHONE_NUMBER"},
]
character_mask_config = {
"masking_character": "*",
"number_to_mask": 0,
"reverse_order": False,
}
primitive_transformation = {
"character_mask_config": character_mask_config
}
deidentify_config = {
"info_type_transformations": {
"transformations": [
{
"info_types": info_types,
"primitive_transformation": primitive_transformation,
}
]
}
}
inspect_config = {"info_types": info_types}
item = {"value": text}
parent = f"projects/{project_id}"
response = client.deidentify_content(
request={
"parent": parent,
"deidentify_config": deidentify_config,
"inspect_config": inspect_config,
"item": item,
}
)
return response.item.value
original_text = """
顧客: 山田太郎
メール: yamada@example.com
電話: 090-1234-5678
カード: 4111-1111-1111-1111
"""
masked_text = deidentify_with_mask("my-project-id", original_text)
print("元のテキスト:")
print(original_text)
print("\nマスキング後:")
print(masked_text)
3. Cloud Storageのスキャン
from google.cloud import dlp_v2
def inspect_gcs_file(project_id, bucket_name, file_name):
client = dlp_v2.DlpServiceClient()
storage_config = {
"cloud_storage_options": {
"file_set": {
"url": f"gs://{bucket_name}/{file_name}"
}
}
}
info_types = [
{"name": "CREDIT_CARD_NUMBER"},
{"name": "EMAIL_ADDRESS"},
{"name": "PHONE_NUMBER"},
]
inspect_config = {
"info_types": info_types,
"min_likelihood": dlp_v2.Likelihood.POSSIBLE,
}
actions = [
{
"save_findings": {
"output_config": {
"table": {
"project_id": project_id,
"dataset_id": "dlp_results",
"table_id": "findings",
}
}
}
}
]
inspect_job = {
"inspect_config": inspect_config,
"storage_config": storage_config,
"actions": actions,
}
parent = f"projects/{project_id}"
response = client.create_dlp_job(
request={
"parent": parent,
"inspect_job": inspect_job,
}
)
print(f"ジョブ作成: {response.name}")
return response.name
4. BigQueryテーブルのスキャン
def inspect_bigquery_table(project_id, dataset_id, table_id):
client = dlp_v2.DlpServiceClient()
storage_config = {
"big_query_options": {
"table_reference": {
"project_id": project_id,
"dataset_id": dataset_id,
"table_id": table_id,
},
"sample_method": dlp_v2.BigQueryOptions.SampleMethod.RANDOM_START,
"rows_limit": 10000,
}
}
info_types = [
{"name": "EMAIL_ADDRESS"},
{"name": "CREDIT_CARD_NUMBER"},
{"name": "PHONE_NUMBER"},
]
inspect_config = {
"info_types": info_types,
"min_likelihood": dlp_v2.Likelihood.LIKELY,
}
actions = [
{
"pub_sub": {
"topic": f"projects/{project_id}/topics/dlp-findings"
}
}
]
inspect_job = {
"inspect_config": inspect_config,
"storage_config": storage_config,
"actions": actions,
}
parent = f"projects/{project_id}"
response = client.create_dlp_job(
request={"parent": parent, "inspect_job": inspect_job}
)
print(f"BigQueryスキャンジョブ開始: {response.name}")
return response.name