🔍 なぜこの設計が最適なのか
理由1️⃣: 顧客ごとのデータが連続配置される
| 行キー(辞書順でソート) |
商品ID |
金額 |
実際の日時 |
| customer#12345#9223370450809775807 |
PROD-789 |
¥15,000 |
2024-11-25 10:30 |
| customer#12345#9223370453809775807 |
PROD-456 |
¥8,500 |
2024-11-24 15:20 |
| customer#12345#9223370460809775807 |
PROD-123 |
¥12,000 |
2024-11-20 09:15 |
| customer#67890#9223370449809775807 |
PROD-321 |
¥5,000 |
2024-11-25 11:00 |
| customer#67890#9223370455809775807 |
PROD-654 |
¥25,000 |
2024-11-23 14:30 |
✅ 同じ顧客(customer#12345)のデータが連続して配置される
✅ 顧客IDで範囲スキャンすると、その顧客の全購入履歴を効率的に取得
理由2️⃣: 最新データが常に先頭に配置される
時系列とBigtableでの配置順序
⟲
Bigtableでの配置順序
...#9223370450809775807
← 先頭(最新)
...#9223370460809775807
← 最古
✅ 逆タイムスタンプにより、最新データが行キーの辞書順で先頭に
✅ 「最新N件取得」が範囲スキャンの最初のN行を取るだけで完了
理由3️⃣: 最新データの取得が超高速
String startKey = "customer#12345#";
String endKey = "customer#12345$";
Query query = Query.create(TABLE_ID)
.range(startKey, endKey)
.limit(1);
query.limit(10);
✅ 範囲スキャンの開始位置が明確
✅ 最初のN行を読むだけで最新N件が取得可能
✅ 全行スキャン不要で、レイテンシが極めて低い
理由4️⃣: ホットスポットを回避
❌ タイムスタンプのみの場合
最新のタイムスタンプ範囲に全ての書き込みが集中
→ 特定のノードに負荷が集中
→ ボトルネックが発生
→
✅ 顧客ID + 逆タイムスタンプ
顧客IDが先頭にあるため書き込みが分散
→ 各ノードに均等に負荷分散
→ スケーラブル
ノードA: 25%
ノードB: 25%
ノードC: 25%
ノードD: 25%
💻 実装例
Java実装
import com.google.cloud.bigtable.data.v2.BigtableDataClient;
import com.google.cloud.bigtable.data.v2.models.*;
public class PurchaseHistoryService {
private static final String TABLE_ID = "purchase_history";
private static final String COLUMN_FAMILY = "purchase";
public void savePurchase(String customerId, Purchase purchase) {
long reverseTimestamp = Long.MAX_VALUE - purchase.getTimestamp();
String rowKey = "customer#" + customerId + "#" + reverseTimestamp;
RowMutation mutation = RowMutation.create(TABLE_ID, rowKey)
.setCell(COLUMN_FAMILY, "product_id", purchase.getProductId())
.setCell(COLUMN_FAMILY, "amount", purchase.getAmount())
.setCell(COLUMN_FAMILY, "quantity", purchase.getQuantity());
dataClient.mutateRow(mutation);
}
public Purchase getLatestPurchase(String customerId) {
String prefix = "customer#" + customerId + "#";
Query query = Query.create(TABLE_ID)
.prefix(prefix)
.limit(1);
ServerStream<Row> rows = dataClient.readRows(query);
for (Row row : rows) {
return parseRowToPurchase(row);
}
return null;
}
public List<Purchase> getRecentPurchases(String customerId, int limit) {
String prefix = "customer#" + customerId + "#";
Query query = Query.create(TABLE_ID)
.prefix(prefix)
.limit(limit);
List<Purchase> purchases = new ArrayList<>();
ServerStream<Row> rows = dataClient.readRows(query);
for (Row row : rows) {
purchases.add(parseRowToPurchase(row));
}
return purchases;
}
public List<Purchase> getPurchasesByDateRange(
String customerId, long startTime, long endTime) {
long reverseStart = Long.MAX_VALUE - endTime;
long reverseEnd = Long.MAX_VALUE - startTime;
String startKey = "customer#" + customerId + "#" + reverseStart;
String endKey = "customer#" + customerId + "#" + reverseEnd;
Query query = Query.create(TABLE_ID)
.range(startKey, endKey);
List<Purchase> purchases = new ArrayList<>();
ServerStream<Row> rows = dataClient.readRows(query);
for (Row row : rows) {
purchases.add(parseRowToPurchase(row));
}
return purchases;
}
}
Python実装
from google.cloud import bigtable
import sys
class PurchaseHistoryService:
def __init__(self, project_id, instance_id):
client = bigtable.Client(project=project_id)
instance = client.instance(instance_id)
self.table = instance.table('purchase_history')
self.column_family = 'purchase'
def save_purchase(self, customer_id, purchase):
reverse_timestamp = sys.maxsize - purchase['timestamp']
row_key = f"customer#{customer_id}#{reverse_timestamp}"
row = self.table.direct_row(row_key)
row.set_cell(self.column_family, 'product_id',
purchase['product_id'])
row.set_cell(self.column_family, 'amount',
str(purchase['amount']))
row.set_cell(self.column_family, 'quantity',
str(purchase['quantity']))
row.commit()
def get_latest_purchase(self, customer_id):
prefix = f"customer#{customer_id}#"
rows = self.table.read_rows(
start_key=prefix.encode(),
end_key=f"{prefix}\xff".encode(),
limit=1
)
for row in rows:
return self._parse_row(row)
return None
def get_recent_purchases(self, customer_id, limit=10):
prefix = f"customer#{customer_id}#"
rows = self.table.read_rows(
start_key=prefix.encode(),
end_key=f"{prefix}\xff".encode(),
limit=limit
)
purchases = []
for row in rows:
purchases.append(self._parse_row(row))
return purchases