16
16
17
17
package io .cdap .cdap .app .etl .gcp ;
18
18
19
+ import com .google .cloud .bigquery .BigQuery ;
20
+ import com .google .cloud .bigquery .DatasetInfo ;
19
21
import com .google .common .base .Joiner ;
20
- import com .google .common .collect .ImmutableList ;
21
22
import com .google .common .collect .ImmutableMap ;
22
23
import com .google .common .collect .ImmutableSet ;
23
24
import com .google .common .collect .Lists ;
66
67
import org .apache .avro .generic .GenericRecordBuilder ;
67
68
import org .apache .avro .io .DatumReader ;
68
69
import org .junit .Assert ;
70
+ import org .junit .BeforeClass ;
69
71
import org .junit .Test ;
70
72
import org .junit .experimental .categories .Category ;
73
+ import org .slf4j .Logger ;
74
+ import org .slf4j .LoggerFactory ;
71
75
72
76
import java .io .ByteArrayInputStream ;
73
77
import java .io .IOException ;
74
78
import java .net .HttpURLConnection ;
75
79
import java .net .URL ;
80
+ import java .time .LocalDateTime ;
81
+ import java .time .format .DateTimeFormatter ;
76
82
import java .util .ArrayList ;
77
83
import java .util .HashMap ;
78
84
import java .util .HashSet ;
86
92
*/
87
93
public class GoogleBigQuerySQLEngineTest extends DataprocETLTestBase {
88
94
95
+ private static final Logger LOG = LoggerFactory .getLogger (GoogleBigQuerySQLEngineTest .class );
89
96
private static final String BQ_SQLENGINE_PLUGIN_NAME = "BigQueryPushdownEngine" ;
90
- private static final String BIG_QUERY_DATASET = "bq_dataset_joiner_test " ;
97
+ private static final String BIG_QUERY_DATASET_PREFIX = "bq_pd_ds_ " ;
91
98
private static final String CONNECTION_NAME = String .format ("test_bq_%s" , GoogleBigQueryUtils .getUUID ());
92
99
public static final String PURCHASE_SOURCE = "purchaseSource" ;
93
100
public static final String ITEM_SINK = "itemSink" ;
94
101
public static final String USER_SINK = "userSink" ;
95
102
public static final String DEDUPLICATE_SOURCE = "userSource" ;
96
103
public static final String DEDUPLICATE_SINK = "userSink" ;
104
+ public static final long MILLISECONDS_IN_A_DAY = 24 * 60 * 60 * 1000 ;
105
+ public static final DateTimeFormatter DATE_TIME_FORMAT = DateTimeFormatter .ofPattern ("yyyy_MM_dd_HH_mm_ss_SSS" );
97
106
107
+ private static BigQuery bq ;
108
+ private String bigQueryDataset ;
109
+
110
+
98
111
private static final Map <String , String > CONFIG_MAP = new ImmutableMap .Builder <String , String >()
99
112
.put ("uniqueFields" , "profession" )
100
113
.put ("filterOperation" , "age:Min" )
101
114
.build ();
102
115
103
- private static final List <String > CONDITIONAL_AGGREGATES = ImmutableList .of (
104
- "highestPrice:maxIf(price):condition(city.equals('LA'))" ,
105
- "averageDonutPrice:avgIf(price):condition(item.equals('doughnut'))" ,
106
- "totalPurchasesInTokyo:sumIf(price):condition(city.equals('Tokyo'))" ,
107
- "anyPurchaseInBerlin:anyIf(item):condition(city.equals('Berlin'))" ,
108
- "doughnutsSold:countIf(item):condition(item.equals('doughnut'))" ,
109
- "lowestPrice:minIf(price):condition(!item.equals('bagel'))"
110
- );
111
-
112
116
public static final Schema PURCHASE_SCHEMA = Schema .recordOf (
113
117
"purchase" ,
114
118
Schema .Field .of ("ts" , Schema .of (Schema .Type .LONG )),
@@ -128,6 +132,11 @@ public class GoogleBigQuerySQLEngineTest extends DataprocETLTestBase {
128
132
Schema .Field .of ("totalPurchases" , Schema .of (Schema .Type .LONG )),
129
133
Schema .Field .of ("totalSpent" , Schema .of (Schema .Type .LONG )));
130
134
135
+ @ BeforeClass
136
+ public static void testClassSetup () throws IOException {
137
+ bq = GoogleBigQueryUtils .getBigQuery (getProjectId (), getServiceAccountCredentials ());
138
+ }
139
+
131
140
@ Override
132
141
protected void innerSetup () throws Exception {
133
142
Tasks .waitFor (true , () -> {
@@ -140,10 +149,13 @@ protected void innerSetup() throws Exception {
140
149
}
141
150
}, 5 , TimeUnit .MINUTES , 3 , TimeUnit .SECONDS );
142
151
createConnection (CONNECTION_NAME , "BigQuery" );
152
+ bigQueryDataset = BIG_QUERY_DATASET_PREFIX + LocalDateTime .now ().format (DATE_TIME_FORMAT );
153
+ createDataset (bigQueryDataset );
143
154
}
144
155
145
156
@ Override
146
157
protected void innerTearDown () throws Exception {
158
+ deleteDataset (bigQueryDataset );
147
159
deleteConnection (CONNECTION_NAME );
148
160
}
149
161
@@ -181,7 +193,8 @@ private Map<String, String> getProps(boolean useConnection, String includedStage
181
193
props .put (ConfigUtil .NAME_CONNECTION , connectionId );
182
194
props .put (ConfigUtil .NAME_USE_CONNECTION , "true" );
183
195
}
184
- props .put ("dataset" , BIG_QUERY_DATASET );
196
+ props .put ("dataset" , bigQueryDataset );
197
+ props .put ("retainTables" , "true" );
185
198
if (includedStages != null ) {
186
199
props .put ("includedStages" , includedStages );
187
200
}
@@ -540,35 +553,6 @@ private void testSQLEngineGroupBy(Engine engine, boolean useConnection) throws E
540
553
541
554
verifyOutput (groupedUsers , groupedItems );
542
555
}
543
-
544
- private void ingestConditionData (String conditionDatasetName ) throws Exception {
545
- DataSetManager <Table > manager = getTableDataset (conditionDatasetName );
546
- Table table = manager .get ();
547
- putConditionValues (table , 1 , "Ben" , 23 , true , "Berlin" , "doughnut" , 1.5 );
548
- putConditionValues (table , 2 , "Ben" , 23 , true , "LA" , "pretzel" , 2.05 );
549
- putConditionValues (table , 3 , "Ben" , 23 , true , "Berlin" , "doughnut" , 0.75 );
550
- putConditionValues (table , 4 , "Ben" , 23 , true , "Tokyo" , "pastry" , 3.25 );
551
- putConditionValues (table , 5 , "Emma" , 18 , false , "Tokyo" , "doughnut" , 1.75 );
552
- putConditionValues (table , 6 , "Emma" , 18 , false , "LA" , "bagel" , 2.95 );
553
- putConditionValues (table , 7 , "Emma" , 18 , false , "Berlin" , "pretzel" , 2.05 );
554
- putConditionValues (table , 8 , "Ron" , 22 , true , "LA" , "bagel" , 2.95 );
555
- putConditionValues (table , 9 , "Ron" , 22 , true , "Tokyo" , "pretzel" , 0.5 );
556
- putConditionValues (table , 10 , "Ron" , 22 , true , "Berlin" , "doughnut" , 1.75 );
557
-
558
- manager .flush ();
559
- }
560
-
561
- private void putConditionValues (Table table , int id , String name , double age , boolean isMember , String city ,
562
- String item , double price ) {
563
- Put put = new Put (Bytes .toBytes (id ));
564
- put .add ("name" , name );
565
- put .add ("age" , age );
566
- put .add ("isMember" , isMember );
567
- put .add ("city" , city );
568
- put .add ("item" , item );
569
- put .add ("price" , price );
570
- table .put (put );
571
- }
572
556
573
557
private Map <String , List <Long >> readOutputGroupBy (ServiceManager serviceManager , String sink , Schema schema )
574
558
throws IOException {
@@ -806,4 +790,23 @@ private void stopServiceForDataset(String datasetName) throws Exception {
806
790
.getServiceManager (AbstractDatasetApp .DatasetService .class .getSimpleName ())
807
791
.stop ();
808
792
}
793
+
794
+ private static void createDataset (String bigQueryDataset ) {
795
+ LOG .info ("Creating bigquery dataset {}" , bigQueryDataset );
796
+ // Create dataset with a default table expiration of 24 hours.
797
+ DatasetInfo datasetInfo = DatasetInfo .newBuilder (bigQueryDataset )
798
+ .setDefaultTableLifetime (MILLISECONDS_IN_A_DAY )
799
+ .setDefaultPartitionExpirationMs (MILLISECONDS_IN_A_DAY )
800
+ .build ();
801
+ bq .create (datasetInfo );
802
+ LOG .info ("Created bigquery dataset {}" , bigQueryDataset );
803
+ }
804
+
805
+ private static void deleteDataset (String bigQueryDataset ) {
806
+ LOG .info ("Deleting bigquery dataset {}" , bigQueryDataset );
807
+ boolean deleted = bq .delete (bigQueryDataset , BigQuery .DatasetDeleteOption .deleteContents ());
808
+ if (deleted ) {
809
+ LOG .info ("Deleted bigquery dataset {}" , bigQueryDataset );
810
+ }
811
+ }
809
812
}
0 commit comments