Skip to content

Commit b57070f

Browse files
authored
chore: [DLP] update to new sample format (GoogleCloudPlatform#1615)
1 parent 03d5476 commit b57070f

28 files changed

+2073
-1998
lines changed

dlp/src/categorical_stats.php

Lines changed: 127 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -22,18 +22,9 @@
2222
* @see https://github.com/GoogleCloudPlatform/php-docs-samples/tree/master/dlp/README.md
2323
*/
2424

25-
// Include Google Cloud dependendencies using Composer
26-
require_once __DIR__ . '/../vendor/autoload.php';
27-
28-
if (count($argv) != 8) {
29-
return print("Usage: php categorical_stats.php CALLING_PROJECT DATA_PROJECT TOPIC SUBSCRIPTION DATASET TABLE COLUMN\n");
30-
}
31-
list($_, $callingProjectId, $dataProjectId, $topicId, $subscriptionId, $datasetId, $tableId, $columnName) = $argv;
25+
namespace Google\Cloud\Samples\Dlp;
3226

3327
# [START dlp_categorical_stats]
34-
/**
35-
* Computes risk metrics of a column of data in a Google BigQuery table.
36-
*/
3728
use Google\Cloud\Dlp\V2\DlpServiceClient;
3829
use Google\Cloud\Dlp\V2\RiskAnalysisJobConfig;
3930
use Google\Cloud\Dlp\V2\BigQueryTable;
@@ -45,118 +36,134 @@
4536
use Google\Cloud\Dlp\V2\FieldId;
4637
use Google\Cloud\PubSub\PubSubClient;
4738

48-
/** Uncomment and populate these variables in your code */
49-
// $callingProjectId = 'The project ID to run the API call under';
50-
// $dataProjectId = 'The project ID containing the target Datastore';
51-
// $topicId = 'The name of the Pub/Sub topic to notify once the job completes';
52-
// $subscriptionId = 'The name of the Pub/Sub subscription to use when listening for job';
53-
// $datasetId = 'The ID of the dataset to inspect';
54-
// $tableId = 'The ID of the table to inspect';
55-
// $columnName = 'The name of the column to compute risk metrics for, e.g. "age"';
56-
57-
// Instantiate a client.
58-
$dlp = new DlpServiceClient([
59-
'projectId' => $callingProjectId,
60-
]);
61-
$pubsub = new PubSubClient([
62-
'projectId' => $callingProjectId,
63-
]);
64-
$topic = $pubsub->topic($topicId);
65-
66-
// Construct risk analysis config
67-
$columnField = (new FieldId())
68-
->setName($columnName);
69-
70-
$statsConfig = (new CategoricalStatsConfig())
71-
->setField($columnField);
72-
73-
$privacyMetric = (new PrivacyMetric())
74-
->setCategoricalStatsConfig($statsConfig);
75-
76-
// Construct items to be analyzed
77-
$bigqueryTable = (new BigQueryTable())
78-
->setProjectId($dataProjectId)
79-
->setDatasetId($datasetId)
80-
->setTableId($tableId);
81-
82-
// Construct the action to run when job completes
83-
$pubSubAction = (new PublishToPubSub())
84-
->setTopic($topic->name());
85-
86-
$action = (new Action())
87-
->setPubSub($pubSubAction);
88-
89-
// Construct risk analysis job config to run
90-
$riskJob = (new RiskAnalysisJobConfig())
91-
->setPrivacyMetric($privacyMetric)
92-
->setSourceTable($bigqueryTable)
93-
->setActions([$action]);
94-
95-
// Submit request
96-
$parent = "projects/$callingProjectId/locations/global";
97-
$job = $dlp->createDlpJob($parent, [
98-
'riskJob' => $riskJob
99-
]);
100-
101-
// Listen for job notifications via an existing topic/subscription.
102-
$subscription = $topic->subscription($subscriptionId);
103-
104-
// Poll Pub/Sub using exponential backoff until job finishes
105-
// Consider using an asynchronous execution model such as Cloud Functions
106-
$attempt = 1;
107-
$startTime = time();
108-
do {
109-
foreach ($subscription->pull() as $message) {
110-
if (isset($message->attributes()['DlpJobName']) &&
111-
$message->attributes()['DlpJobName'] === $job->getName()) {
112-
$subscription->acknowledge($message);
113-
// Get the updated job. Loop to avoid race condition with DLP API.
114-
do {
115-
$job = $dlp->getDlpJob($job->getName());
116-
} while ($job->getState() == JobState::RUNNING);
117-
break 2; // break from parent do while
118-
}
119-
}
120-
printf('Waiting for job to complete' . PHP_EOL);
121-
// Exponential backoff with max delay of 60 seconds
122-
sleep(min(60, pow(2, ++$attempt)));
123-
} while (time() - $startTime < 600); // 10 minute timeout
124-
125-
// Print finding counts
126-
printf('Job %s status: %s' . PHP_EOL, $job->getName(), JobState::name($job->getState()));
127-
switch ($job->getState()) {
128-
case JobState::DONE:
129-
$histBuckets = $job->getRiskDetails()->getCategoricalStatsResult()->getValueFrequencyHistogramBuckets();
130-
131-
foreach ($histBuckets as $bucketIndex => $histBucket) {
132-
// Print bucket stats
133-
printf('Bucket %s:' . PHP_EOL, $bucketIndex);
134-
printf(' Most common value occurs %s time(s)' . PHP_EOL, $histBucket->getValueFrequencyUpperBound());
135-
printf(' Least common value occurs %s time(s)' . PHP_EOL, $histBucket->getValueFrequencyLowerBound());
136-
printf(' %s unique value(s) total.', $histBucket->getBucketSize());
137-
138-
// Print bucket values
139-
foreach ($histBucket->getBucketValues() as $percent => $quantile) {
140-
printf(
141-
' Value %s occurs %s time(s).' . PHP_EOL,
142-
$quantile->getValue()->serializeToJsonString(),
143-
$quantile->getCount()
144-
);
39+
/**
40+
* Computes risk metrics of a column of data in a Google BigQuery table.
41+
*
42+
* @param string $callingProjectId The project ID to run the API call under
43+
* @param string $dataProjectId The project ID containing the target Datastore
44+
* @param string $topicId The name of the Pub/Sub topic to notify once the job completes
45+
* @param string $subscriptionId The name of the Pub/Sub subscription to use when listening for job
46+
* @param string $datasetId The ID of the dataset to inspect
47+
* @param string $tableId The ID of the table to inspect
48+
* @param string $columnName The name of the column to compute risk metrics for, e.g. "age"
49+
*/
50+
function categorical_stats(
51+
string $callingProjectId,
52+
string $dataProjectId,
53+
string $topicId,
54+
string $subscriptionId,
55+
string $datasetId,
56+
string $tableId,
57+
string $columnName
58+
): void {
59+
// Instantiate a client.
60+
$dlp = new DlpServiceClient([
61+
'projectId' => $callingProjectId,
62+
]);
63+
$pubsub = new PubSubClient([
64+
'projectId' => $callingProjectId,
65+
]);
66+
$topic = $pubsub->topic($topicId);
67+
68+
// Construct risk analysis config
69+
$columnField = (new FieldId())
70+
->setName($columnName);
71+
72+
$statsConfig = (new CategoricalStatsConfig())
73+
->setField($columnField);
74+
75+
$privacyMetric = (new PrivacyMetric())
76+
->setCategoricalStatsConfig($statsConfig);
77+
78+
// Construct items to be analyzed
79+
$bigqueryTable = (new BigQueryTable())
80+
->setProjectId($dataProjectId)
81+
->setDatasetId($datasetId)
82+
->setTableId($tableId);
83+
84+
// Construct the action to run when job completes
85+
$pubSubAction = (new PublishToPubSub())
86+
->setTopic($topic->name());
87+
88+
$action = (new Action())
89+
->setPubSub($pubSubAction);
90+
91+
// Construct risk analysis job config to run
92+
$riskJob = (new RiskAnalysisJobConfig())
93+
->setPrivacyMetric($privacyMetric)
94+
->setSourceTable($bigqueryTable)
95+
->setActions([$action]);
96+
97+
// Submit request
98+
$parent = "projects/$callingProjectId/locations/global";
99+
$job = $dlp->createDlpJob($parent, [
100+
'riskJob' => $riskJob
101+
]);
102+
103+
// Listen for job notifications via an existing topic/subscription.
104+
$subscription = $topic->subscription($subscriptionId);
105+
106+
// Poll Pub/Sub using exponential backoff until job finishes
107+
// Consider using an asynchronous execution model such as Cloud Functions
108+
$attempt = 1;
109+
$startTime = time();
110+
do {
111+
foreach ($subscription->pull() as $message) {
112+
if (isset($message->attributes()['DlpJobName']) &&
113+
$message->attributes()['DlpJobName'] === $job->getName()) {
114+
$subscription->acknowledge($message);
115+
// Get the updated job. Loop to avoid race condition with DLP API.
116+
do {
117+
$job = $dlp->getDlpJob($job->getName());
118+
} while ($job->getState() == JobState::RUNNING);
119+
break 2; // break from parent do while
145120
}
146121
}
122+
printf('Waiting for job to complete' . PHP_EOL);
123+
// Exponential backoff with max delay of 60 seconds
124+
sleep(min(60, pow(2, ++$attempt)));
125+
} while (time() - $startTime < 600); // 10 minute timeout
126+
127+
// Print finding counts
128+
printf('Job %s status: %s' . PHP_EOL, $job->getName(), JobState::name($job->getState()));
129+
switch ($job->getState()) {
130+
case JobState::DONE:
131+
$histBuckets = $job->getRiskDetails()->getCategoricalStatsResult()->getValueFrequencyHistogramBuckets();
132+
133+
foreach ($histBuckets as $bucketIndex => $histBucket) {
134+
// Print bucket stats
135+
printf('Bucket %s:' . PHP_EOL, $bucketIndex);
136+
printf(' Most common value occurs %s time(s)' . PHP_EOL, $histBucket->getValueFrequencyUpperBound());
137+
printf(' Least common value occurs %s time(s)' . PHP_EOL, $histBucket->getValueFrequencyLowerBound());
138+
printf(' %s unique value(s) total.', $histBucket->getBucketSize());
139+
140+
// Print bucket values
141+
foreach ($histBucket->getBucketValues() as $percent => $quantile) {
142+
printf(
143+
' Value %s occurs %s time(s).' . PHP_EOL,
144+
$quantile->getValue()->serializeToJsonString(),
145+
$quantile->getCount()
146+
);
147+
}
148+
}
147149

148-
break;
149-
case JobState::FAILED:
150-
$errors = $job->getErrors();
151-
printf('Job %s had errors:' . PHP_EOL, $job->getName());
152-
foreach ($errors as $error) {
153-
var_dump($error->getDetails());
154-
}
155-
break;
156-
case JobState::PENDING:
157-
printf('Job has not completed. Consider a longer timeout or an asynchronous execution model' . PHP_EOL);
158-
break;
159-
default:
160-
printf('Unexpected job state.');
150+
break;
151+
case JobState::FAILED:
152+
$errors = $job->getErrors();
153+
printf('Job %s had errors:' . PHP_EOL, $job->getName());
154+
foreach ($errors as $error) {
155+
var_dump($error->getDetails());
156+
}
157+
break;
158+
case JobState::PENDING:
159+
printf('Job has not completed. Consider a longer timeout or an asynchronous execution model' . PHP_EOL);
160+
break;
161+
default:
162+
printf('Unexpected job state.');
163+
}
161164
}
162165
# [END dlp_categorical_stats]
166+
167+
// The following 2 lines are only needed to run the samples
168+
require_once __DIR__ . '/../../testing/sample_helpers.php';
169+
\Google\Cloud\Samples\execute_sample(__FILE__, __NAMESPACE__, $argv);

dlp/src/create_inspect_template.php

Lines changed: 56 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -22,75 +22,77 @@
2222
* @see https://github.com/GoogleCloudPlatform/php-docs-samples/tree/master/dlp/README.md
2323
*/
2424

25-
// Include Google Cloud dependendencies using Composer
26-
require_once __DIR__ . '/../vendor/autoload.php';
27-
28-
if (count($argv) < 3 || count($argv) > 6) {
29-
return print("Usage: php create_inspect_template.php CALLING_PROJECT TEMPLATE [DISPLAY_NAME] [DESCRIPTION] [MAX_FINDINGS]\n");
30-
}
31-
list($_, $callingProjectId, $templateId, $displayName, $description) = $argv;
32-
$displayName = isset($argv[3]) ? $argv[3] : '';
33-
$description = isset($argv[4]) ? $argv[4] : '';
34-
$maxFindings = isset($argv[5]) ? (int) $argv[5] : 0;
25+
namespace Google\Cloud\Samples\Dlp;
3526

3627
// [START dlp_create_inspect_template]
37-
/**
38-
* Create a new DLP inspection configuration template.
39-
*/
4028
use Google\Cloud\Dlp\V2\DlpServiceClient;
4129
use Google\Cloud\Dlp\V2\InfoType;
4230
use Google\Cloud\Dlp\V2\InspectConfig;
4331
use Google\Cloud\Dlp\V2\InspectTemplate;
4432
use Google\Cloud\Dlp\V2\Likelihood;
4533
use Google\Cloud\Dlp\V2\InspectConfig\FindingLimits;
4634

47-
/** Uncomment and populate these variables in your code */
48-
// $callingProjectId = 'The project ID to run the API call under';
49-
// $templateId = 'The name of the template to be created';
50-
// $displayName = ''; // (Optional) The human-readable name to give the template
51-
// $description = ''; // (Optional) A description for the trigger to be created
52-
// $maxFindings = 0; // (Optional) The maximum number of findings to report per request (0 = server maximum)
53-
54-
// Instantiate a client.
55-
$dlp = new DlpServiceClient();
35+
/**
36+
* Create a new DLP inspection configuration template.
37+
*
38+
* @param string $callingProjectId project ID to run the API call under
39+
* @param string $templateId name of the template to be created
40+
* @param string $displayName (Optional) The human-readable name to give the template
41+
* @param string $description (Optional) A description for the trigger to be created
42+
* @param int $maxFindings (Optional) The maximum number of findings to report per request (0 = server maximum)
43+
*/
44+
function create_inspect_template(
45+
string $callingProjectId,
46+
string $templateId,
47+
string $displayName = '',
48+
string $description = '',
49+
int $maxFindings = 0
50+
): void {
51+
// Instantiate a client.
52+
$dlp = new DlpServiceClient();
5653

57-
// ----- Construct inspection config -----
58-
// The infoTypes of information to match
59-
$personNameInfoType = (new InfoType())
60-
->setName('PERSON_NAME');
61-
$phoneNumberInfoType = (new InfoType())
62-
->setName('PHONE_NUMBER');
63-
$infoTypes = [$personNameInfoType, $phoneNumberInfoType];
54+
// ----- Construct inspection config -----
55+
// The infoTypes of information to match
56+
$personNameInfoType = (new InfoType())
57+
->setName('PERSON_NAME');
58+
$phoneNumberInfoType = (new InfoType())
59+
->setName('PHONE_NUMBER');
60+
$infoTypes = [$personNameInfoType, $phoneNumberInfoType];
6461

65-
// Whether to include the matching string in the response
66-
$includeQuote = true;
62+
// Whether to include the matching string in the response
63+
$includeQuote = true;
6764

68-
// The minimum likelihood required before returning a match
69-
$minLikelihood = likelihood::LIKELIHOOD_UNSPECIFIED;
65+
// The minimum likelihood required before returning a match
66+
$minLikelihood = likelihood::LIKELIHOOD_UNSPECIFIED;
7067

71-
// Specify finding limits
72-
$limits = (new FindingLimits())
73-
->setMaxFindingsPerRequest($maxFindings);
68+
// Specify finding limits
69+
$limits = (new FindingLimits())
70+
->setMaxFindingsPerRequest($maxFindings);
7471

75-
// Create the configuration object
76-
$inspectConfig = (new InspectConfig())
77-
->setMinLikelihood($minLikelihood)
78-
->setLimits($limits)
79-
->setInfoTypes($infoTypes)
80-
->setIncludeQuote($includeQuote);
72+
// Create the configuration object
73+
$inspectConfig = (new InspectConfig())
74+
->setMinLikelihood($minLikelihood)
75+
->setLimits($limits)
76+
->setInfoTypes($infoTypes)
77+
->setIncludeQuote($includeQuote);
8178

82-
// Construct inspection template
83-
$inspectTemplate = (new InspectTemplate())
84-
->setInspectConfig($inspectConfig)
85-
->setDisplayName($displayName)
86-
->setDescription($description);
79+
// Construct inspection template
80+
$inspectTemplate = (new InspectTemplate())
81+
->setInspectConfig($inspectConfig)
82+
->setDisplayName($displayName)
83+
->setDescription($description);
8784

88-
// Run request
89-
$parent = "projects/$callingProjectId/locations/global";
90-
$template = $dlp->createInspectTemplate($parent, $inspectTemplate, [
91-
'templateId' => $templateId
92-
]);
85+
// Run request
86+
$parent = "projects/$callingProjectId/locations/global";
87+
$template = $dlp->createInspectTemplate($parent, $inspectTemplate, [
88+
'templateId' => $templateId
89+
]);
9390

94-
// Print results
95-
printf('Successfully created template %s' . PHP_EOL, $template->getName());
91+
// Print results
92+
printf('Successfully created template %s' . PHP_EOL, $template->getName());
93+
}
9694
// [END dlp_create_inspect_template]
95+
96+
// The following 2 lines are only needed to run the samples
97+
require_once __DIR__ . '/../../testing/sample_helpers.php';
98+
\Google\Cloud\Samples\execute_sample(__FILE__, __NAMESPACE__, $argv);

0 commit comments

Comments
 (0)