问题描述
我有这样的要求...我想外部连接两个表A和B(例如),以便如果键匹配,我的输出应具有表B的值(而不是表A的列值) 例如
A
a b
1 abc
2 fgh
3 xyz
B
a b
1 wer
6 uio
a b
1 wer
2 fgh
3 xyz
6 uio
解决方法
这是优先级查询。您似乎想要基于第一列的import io
import sys
import folium
from folium.plugins.draw import Draw
from PyQt5.QtWidgets import QApplication,QFileDialog,QVBoxLayout,QWidget
from PyQt5.QtWebEngineWidgets import QWebEngineView
class Mapy(QWidget):
def __init__(self,parent=None):
super().__init__(parent)
self.interfejs()
def interfejs(self):
vbox = QVBoxLayout(self)
self.webEngineView = QWebEngineView()
self.webEngineView.page().profile().downloadRequested.connect(
self.handle_downloadRequested
)
self.loadPage()
vbox.addWidget(self.webEngineView)
self.setLayout(vbox)
self.setGeometry(300,300,350,250)
self.setWindowTitle("mapy")
self.show()
def loadPage(self):
m = folium.Map(location=[51.7687323,19.4569911],zoom_start=5)
Draw(
export=True,filename="my_data.geojson",position="topleft",draw_options={
"polyline": False,"rectangle": False,"circle": False,"circlemarker": False,},edit_options={"poly": {"allowIntersection": False}},).add_to(m)
data = io.BytesIO()
m.save(data,close_file=False)
self.webEngineView.setHtml(data.getvalue().decode())
def handle_downloadRequested(self,item):
path,_ = QFileDialog.getSaveFileName(
self,"Save File",item.suggestedFileName()
)
if path:
item.setPath(path)
item.accept()
if __name__ == "__main__":
app = QApplication(sys.argv)
okno = Mapy()
sys.exit(app.exec_())
中的所有行,然后是b
中的不匹配行。
一种方法是a
:
union all
,
Pyspark解决方案是使用full
联接和coalesce
。
from pyspark.sql import functions as F
# Create dataframes
A = spark.createDataFrame(data=[[1,'abc'],[2,'fgh'],[3,'xyz']],schema=['a','b'])
B = spark.createDataFrame(data=[[1,'wer'],[6,'uio']],'b'])
# Rename column `b` to prevent naming collision
A = A.select('a',F.col('b').alias('b_a'))
B = B.select('a',F.col('b').alias('b_b'))
# Full join on `a` keeps all entries from both dataframes
combined = A.join(B,on='a',how='full')
# Coalesce takes value from `b_b` if not null and `b_a` otherwise
combined = combined.withColumn('b',F.coalesce('b_b','b_a'))
# Drop unneeded helper columns
combined = combined.drop('b_b','b_a')
combined.show()
结果
+---+---+
| a| b|
+---+---+
| 1|wer|
| 2|fgh|
| 3|xyz|
| 6|uio|
+---+---+