Add run.py
This commit is contained in:
parent
9ea0faed9b
commit
5621e4ce9a
35
main.py
35
main.py
@ -1,35 +0,0 @@
|
|||||||
from pyspark.sql import SparkSession
|
|
||||||
from pyspark.sql.functions import concat, lit, col
|
|
||||||
from pyspark.sql.types import StructType,StructField, StringType, IntegerType
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
spark = SparkSession\
|
|
||||||
.builder\
|
|
||||||
.appName('He Said She Said')\
|
|
||||||
.getOrCreate()
|
|
||||||
|
|
||||||
X_schema = StructType([StructField('X', StringType(), True)])
|
|
||||||
|
|
||||||
X_train = spark.read.csv('train/in.tsv', schema=X_schema)
|
|
||||||
Y_train = spark.read.csv('train/expected.tsv', schema=StructType([StructField('Y', IntegerType(), True)]))
|
|
||||||
|
|
||||||
X_train.show()
|
|
||||||
|
|
||||||
train = X_train.join(Y_train, how='full_outer')
|
|
||||||
train.show()
|
|
||||||
|
|
||||||
# # $example on$
|
|
||||||
# # Load training data
|
|
||||||
# data = spark.read.format("libsvm") \
|
|
||||||
# .load("sample_libsvm_data.txt")
|
|
||||||
|
|
||||||
# print('data = ', data)
|
|
||||||
|
|
||||||
# # Split the data into train and test
|
|
||||||
# splits = data.randomSplit([0.6, 0.4], 1234)
|
|
||||||
# train = splits[0]
|
|
||||||
# test = splits[1]
|
|
||||||
|
|
||||||
# print('train = ', train)
|
|
Loading…
Reference in New Issue
Block a user