Add main
This commit is contained in:
parent
b775a221e6
commit
9ea0faed9b
152
main.ipynb
Executable file
152
main.ipynb
Executable file
File diff suppressed because one or more lines are too long
35
main.py
Executable file
35
main.py
Executable file
@ -0,0 +1,35 @@
|
||||
from pyspark.sql import SparkSession
|
||||
from pyspark.sql.functions import concat, lit, col
|
||||
from pyspark.sql.types import StructType,StructField, StringType, IntegerType
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
spark = SparkSession\
|
||||
.builder\
|
||||
.appName('He Said She Said')\
|
||||
.getOrCreate()
|
||||
|
||||
X_schema = StructType([StructField('X', StringType(), True)])
|
||||
|
||||
X_train = spark.read.csv('train/in.tsv', schema=X_schema)
|
||||
Y_train = spark.read.csv('train/expected.tsv', schema=StructType([StructField('Y', IntegerType(), True)]))
|
||||
|
||||
X_train.show()
|
||||
|
||||
train = X_train.join(Y_train, how='full_outer')
|
||||
train.show()
|
||||
|
||||
# # $example on$
|
||||
# # Load training data
|
||||
# data = spark.read.format("libsvm") \
|
||||
# .load("sample_libsvm_data.txt")
|
||||
|
||||
# print('data = ', data)
|
||||
|
||||
# # Split the data into train and test
|
||||
# splits = data.randomSplit([0.6, 0.4], 1234)
|
||||
# train = splits[0]
|
||||
# test = splits[1]
|
||||
|
||||
# print('train = ', train)
|
Loading…
Reference in New Issue
Block a user