Skip to content

Commit 62329cc

Browse files
author
bitoollearner
committed
Leet Code Pyspark Questions Batch-2
Leet Code Pyspark Questions Batch-2
1 parent c77028c commit 62329cc

11 files changed

+683
-79
lines changed

Solved/196. Delete Duplicate Emails (Easy)-(Solved).ipynb

Lines changed: 55 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@
44
"cell_type": "markdown",
55
"metadata": {
66
"application/vnd.databricks.v1+cell": {
7-
"cellMetadata": {},
7+
"cellMetadata": {
8+
"byteLimit": 2048000,
9+
"rowLimit": 10000
10+
},
811
"inputWidgets": {},
912
"nuid": "933f6fcf-3b7f-4e22-a9da-47224a7b1132",
1013
"showTitle": false,
@@ -21,7 +24,10 @@
2124
"execution_count": 0,
2225
"metadata": {
2326
"application/vnd.databricks.v1+cell": {
24-
"cellMetadata": {},
27+
"cellMetadata": {
28+
"byteLimit": 2048000,
29+
"rowLimit": 10000
30+
},
2531
"inputWidgets": {},
2632
"nuid": "0a044ca2-88c3-4a39-9ad3-12c7b6613c0b",
2733
"showTitle": false,
@@ -40,7 +46,10 @@
4046
"cell_type": "markdown",
4147
"metadata": {
4248
"application/vnd.databricks.v1+cell": {
43-
"cellMetadata": {},
49+
"cellMetadata": {
50+
"byteLimit": 2048000,
51+
"rowLimit": 10000
52+
},
4453
"inputWidgets": {},
4554
"nuid": "f16f61a0-1eda-4a08-9e41-e6bc4f90e5dc",
4655
"showTitle": false,
@@ -95,7 +104,10 @@
95104
"execution_count": 0,
96105
"metadata": {
97106
"application/vnd.databricks.v1+cell": {
98-
"cellMetadata": {},
107+
"cellMetadata": {
108+
"byteLimit": 2048000,
109+
"rowLimit": 10000
110+
},
99111
"inputWidgets": {},
100112
"nuid": "99062cca-ba5f-4f53-a7b7-0a79518c0bf0",
101113
"showTitle": false,
@@ -116,22 +128,58 @@
116128
"person_df_196 = spark.createDataFrame(person_data_196, person_columns_196)\n",
117129
"person_df_196.show()"
118130
]
131+
},
132+
{
133+
"cell_type": "code",
134+
"execution_count": 0,
135+
"metadata": {
136+
"application/vnd.databricks.v1+cell": {
137+
"cellMetadata": {
138+
"byteLimit": 2048000,
139+
"rowLimit": 10000
140+
},
141+
"inputWidgets": {},
142+
"nuid": "24f062f3-97b6-449e-bca7-3133bfc8e542",
143+
"showTitle": false,
144+
"tableResultSettingsMap": {},
145+
"title": ""
146+
}
147+
},
148+
"outputs": [],
149+
"source": [
150+
"windowSpec = Window.partitionBy(col(\"email\")).orderBy(col(\"id\").asc())\n",
151+
"\n",
152+
"person_df_196\\\n",
153+
" .withColumn(\"Dup\", row_number().over(windowSpec))\\\n",
154+
" .select(\"id\",\"email\")\\\n",
155+
" .filter(col(\"Dup\") == 1).orderBy(col(\"id\").asc()).show()"
156+
]
119157
}
120158
],
121159
"metadata": {
122160
"application/vnd.databricks.v1+notebook": {
123-
"computePreferences": null,
161+
"computePreferences": {
162+
"hardware": {
163+
"accelerator": null,
164+
"gpuPoolId": null,
165+
"memory": null
166+
}
167+
},
124168
"dashboards": [],
125169
"environmentMetadata": {
126170
"base_environment": "",
127-
"client": "1"
171+
"environment_version": "2"
128172
},
173+
"inputWidgetPreferences": null,
129174
"language": "python",
130175
"notebookMetadata": {
131176
"pythonIndentUnit": 4
132177
},
133-
"notebookName": "196. Delete Duplicate Emails (Easy)",
178+
"notebookName": "196. Delete Duplicate Emails (Easy)-(Solved)",
134179
"widgets": {}
180+
},
181+
"language_info": {
182+
"name": "python"
135183
}
136184
},
137185
"nbformat": 4,

Solved/197. Rising Temperature (Easy)-(Solved).ipynb

Lines changed: 55 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@
44
"cell_type": "markdown",
55
"metadata": {
66
"application/vnd.databricks.v1+cell": {
7-
"cellMetadata": {},
7+
"cellMetadata": {
8+
"byteLimit": 2048000,
9+
"rowLimit": 10000
10+
},
811
"inputWidgets": {},
912
"nuid": "933f6fcf-3b7f-4e22-a9da-47224a7b1132",
1013
"showTitle": false,
@@ -21,7 +24,10 @@
2124
"execution_count": 0,
2225
"metadata": {
2326
"application/vnd.databricks.v1+cell": {
24-
"cellMetadata": {},
27+
"cellMetadata": {
28+
"byteLimit": 2048000,
29+
"rowLimit": 10000
30+
},
2531
"inputWidgets": {},
2632
"nuid": "0a044ca2-88c3-4a39-9ad3-12c7b6613c0b",
2733
"showTitle": false,
@@ -40,7 +46,10 @@
4046
"cell_type": "markdown",
4147
"metadata": {
4248
"application/vnd.databricks.v1+cell": {
43-
"cellMetadata": {},
49+
"cellMetadata": {
50+
"byteLimit": 2048000,
51+
"rowLimit": 10000
52+
},
4453
"inputWidgets": {},
4554
"nuid": "f16f61a0-1eda-4a08-9e41-e6bc4f90e5dc",
4655
"showTitle": false,
@@ -97,7 +106,10 @@
97106
"execution_count": 0,
98107
"metadata": {
99108
"application/vnd.databricks.v1+cell": {
100-
"cellMetadata": {},
109+
"cellMetadata": {
110+
"byteLimit": 2048000,
111+
"rowLimit": 10000
112+
},
101113
"inputWidgets": {},
102114
"nuid": "99062cca-ba5f-4f53-a7b7-0a79518c0bf0",
103115
"showTitle": false,
@@ -118,22 +130,58 @@
118130
"weather_df_197 = spark.createDataFrame(weather_data_197, weather_columns_197)\n",
119131
"weather_df_197.show()"
120132
]
133+
},
134+
{
135+
"cell_type": "code",
136+
"execution_count": 0,
137+
"metadata": {
138+
"application/vnd.databricks.v1+cell": {
139+
"cellMetadata": {
140+
"byteLimit": 2048000,
141+
"rowLimit": 10000
142+
},
143+
"inputWidgets": {},
144+
"nuid": "6bf848f3-4072-4521-9c0a-819d337a2df2",
145+
"showTitle": false,
146+
"tableResultSettingsMap": {},
147+
"title": ""
148+
}
149+
},
150+
"outputs": [],
151+
"source": [
152+
"windowSpec = Window.orderBy(col(\"recordDate\").asc())\n",
153+
"\n",
154+
"weather_df_197\\\n",
155+
" .withColumn(\"temp_yesterday\", lag(col(\"temperature\")).over(windowSpec))\\\n",
156+
" .select(\"id\")\\\n",
157+
" .filter( col(\"temperature\") > col(\"temp_yesterday\")).show()"
158+
]
121159
}
122160
],
123161
"metadata": {
124162
"application/vnd.databricks.v1+notebook": {
125-
"computePreferences": null,
163+
"computePreferences": {
164+
"hardware": {
165+
"accelerator": null,
166+
"gpuPoolId": null,
167+
"memory": null
168+
}
169+
},
126170
"dashboards": [],
127171
"environmentMetadata": {
128172
"base_environment": "",
129-
"client": "1"
173+
"environment_version": "2"
130174
},
175+
"inputWidgetPreferences": null,
131176
"language": "python",
132177
"notebookMetadata": {
133178
"pythonIndentUnit": 4
134179
},
135-
"notebookName": "197. Rising Temperature (Easy)",
180+
"notebookName": "197. Rising Temperature (Easy)-(Solved)",
136181
"widgets": {}
182+
},
183+
"language_info": {
184+
"name": "python"
137185
}
138186
},
139187
"nbformat": 4,

Solved/262. Trips and Users (Hard)-(Solved).ipynb

Lines changed: 60 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@
44
"cell_type": "markdown",
55
"metadata": {
66
"application/vnd.databricks.v1+cell": {
7-
"cellMetadata": {},
7+
"cellMetadata": {
8+
"byteLimit": 2048000,
9+
"rowLimit": 10000
10+
},
811
"inputWidgets": {},
912
"nuid": "933f6fcf-3b7f-4e22-a9da-47224a7b1132",
1013
"showTitle": false,
@@ -21,7 +24,10 @@
2124
"execution_count": 0,
2225
"metadata": {
2326
"application/vnd.databricks.v1+cell": {
24-
"cellMetadata": {},
27+
"cellMetadata": {
28+
"byteLimit": 2048000,
29+
"rowLimit": 10000
30+
},
2531
"inputWidgets": {},
2632
"nuid": "0a044ca2-88c3-4a39-9ad3-12c7b6613c0b",
2733
"showTitle": false,
@@ -40,7 +46,10 @@
4046
"cell_type": "markdown",
4147
"metadata": {
4248
"application/vnd.databricks.v1+cell": {
43-
"cellMetadata": {},
49+
"cellMetadata": {
50+
"byteLimit": 2048000,
51+
"rowLimit": 10000
52+
},
4453
"inputWidgets": {},
4554
"nuid": "f16f61a0-1eda-4a08-9e41-e6bc4f90e5dc",
4655
"showTitle": false,
@@ -148,7 +157,10 @@
148157
"execution_count": 0,
149158
"metadata": {
150159
"application/vnd.databricks.v1+cell": {
151-
"cellMetadata": {},
160+
"cellMetadata": {
161+
"byteLimit": 2048000,
162+
"rowLimit": 10000
163+
},
152164
"inputWidgets": {},
153165
"nuid": "99062cca-ba5f-4f53-a7b7-0a79518c0bf0",
154166
"showTitle": false,
@@ -188,24 +200,64 @@
188200
"\n",
189201
"users_columns_262 = [\"users_id\", \"banned\", \"role\"]\n",
190202
"users_df_262 = spark.createDataFrame(users_data_262, users_columns_262)\n",
191-
"users_df_262.show()"
203+
"client_df_262 = spark.createDataFrame(users_data_262, users_columns_262)\n",
204+
"\n",
205+
"users_df_262.show()\n",
206+
"client_df_262.show()"
207+
]
208+
},
209+
{
210+
"cell_type": "code",
211+
"execution_count": 0,
212+
"metadata": {
213+
"application/vnd.databricks.v1+cell": {
214+
"cellMetadata": {
215+
"byteLimit": 2048000,
216+
"rowLimit": 10000
217+
},
218+
"inputWidgets": {},
219+
"nuid": "57011f04-6bcb-403d-a7f3-c14959eeb50d",
220+
"showTitle": false,
221+
"tableResultSettingsMap": {},
222+
"title": ""
223+
}
224+
},
225+
"outputs": [],
226+
"source": [
227+
"trips_df_262\\\n",
228+
" .join(users_df_262, users_df_262.users_id == trips_df_262.driver_id, 'inner')\\\n",
229+
" .join(client_df_262, client_df_262.users_id == trips_df_262.client_id, 'inner')\\\n",
230+
" .select(\"request_at\",\"status\",client_df_262[\"banned\"].alias(\"banned_client\"),users_df_262[\"banned\"].alias(\"banned_driver\"))\\\n",
231+
" .withColumn(\"cancelled_ride\", when(col(\"status\") != \"completed\",1).otherwise(0))\\\n",
232+
" .filter((client_df_262[\"banned\"] == \"No\") & (users_df_262[\"banned\"] == \"No\"))\\\n",
233+
" .groupBy(\"request_at\").agg(round((sum(col(\"cancelled_ride\")) / count(\"request_at\")),2).alias(\"Cancellation Rate\")).show()"
192234
]
193235
}
194236
],
195237
"metadata": {
196238
"application/vnd.databricks.v1+notebook": {
197-
"computePreferences": null,
239+
"computePreferences": {
240+
"hardware": {
241+
"accelerator": null,
242+
"gpuPoolId": null,
243+
"memory": null
244+
}
245+
},
198246
"dashboards": [],
199247
"environmentMetadata": {
200248
"base_environment": "",
201-
"client": "1"
249+
"environment_version": "2"
202250
},
251+
"inputWidgetPreferences": null,
203252
"language": "python",
204253
"notebookMetadata": {
205254
"pythonIndentUnit": 4
206255
},
207-
"notebookName": "262. Trips and Users (Hard)",
256+
"notebookName": "262. Trips and Users (Hard)-(Solved)",
208257
"widgets": {}
258+
},
259+
"language_info": {
260+
"name": "python"
209261
}
210262
},
211263
"nbformat": 4,

0 commit comments

Comments
 (0)