返回当前数据库
spark.range(1).select(current_database()).show()
+------------------+
|current_database()|
+------------------+
| default|
------------------+
df.withColumn('ldsx',sf.current_date()).show()
+---+----------+
| id| ldsx|
+---+----------+
| 0|2024-10-09|
+---+----------+
df = spark.createDataFrame([('2015-04-08', 2)], ['dt', 'add'])
df.show()
+----------+---+
| dt|add|
+----------+---+
|2015-04-08| 2|
+----------+---+
df.select(sf.add_months(df.dt, 1).alias('next_month')).show()
+----------+
|next_month|
+----------+
|2015-05-08|
+----------+
df.select(sf.add_months('dt', -2).alias('prev_month')).show()
+----------+
|prev_month|
+----------+
|2015-02-08|
+----------+
df = spark.createDataFrame([('2015-04-08', 2,)], ['dt', 'add'])
# 日期加1
df.select(date_add(df.dt, 1).alias('next_date')).collect()
[Row(next_date=datetime.date(2015, 4, 9))]
# 日期-1
df.select(date_add('dt', -1).alias('prev_date')).collect()
[Row(prev_date=datetime.date(2015, 4, 7))]
df.show()
+----------+---+
| dt|sub|
+----------+---+
|2015-04-08| 2|
+----------+---+
df.select(sf.date_sub(df.dt, 1).alias('prev_date')).show()
+----------+
| prev_date|
+----------+
|2015-04-07|
+----------+
df.select(sf.date_sub(df.dt, 5).alias('prev_date')).show()
+----------+
| prev_date|
+----------+
|2015-04-03|
+----------+
df.select(sf.date_sub(df.dt, -5).alias('prev_date')).show()
+----------+
| prev_date|
+----------+
|2015-04-13|
+----------+
df.show()
+----------+
| dt|
+----------+
|2015-04-08|
+----------+
df.select(sf.date_format('dt', 'MM/dd/yyy').alias('date')).show()
+----------+
| date|
+----------+
|04/08/2015|
+----------+
df = spark.createDataFrame([('2015-04-08',)], ['dt'])
df.show()
+----------+
| dt|
+----------+
|2015-04-08|
+----------+
df.select(sf.weekofyear(df.dt).alias('week')).show()
+----+
|week|
+----+
| 15|
+----+
df = spark.createDataFrame([('2015-04-01',)], ['dt'])
df.select(sf.weekofyear(df.dt).alias('week')).show()
+----+
|week|
+----+
| 14|
+----+
df.select(sf.year(df.dt).alias('year')).show()
+----+
|year|
+----+
|2015|
+----+
df = spark.createDataFrame([('2015-04-08',)], ['dt'])
df.show()
+----------+
| dt|
+----------+
|2015-04-08|
+----------+
df.select(sf.month(df.dt).alias('month')).show()
+-----+
|month|
+-----+
| 4|
+-----+
df = spark.createDataFrame([(datetime.datetime(2015, 4, 8, 13, 8, 15),)], ['ts'])
df.show()
+-------------------+
| ts|
+-------------------+
|2015-04-08 13:08:15|
+-------------------+
df.select(sf.minute('ts').alias('hour')).show()
+------+
|minute|
+------+
| 13|
+------+
df = spark.createDataFrame([(datetime.datetime(2015, 4, 8, 13, 8, 15),)], ['ts'])
df.show()
+-------------------+
| ts|
+-------------------+
|2015-04-08 13:08:15|
+-------------------+
df.select(sf.minute('ts').alias('minute')).show()
+------+
|minute|
+------+
| 8|
+------+
import datetime
df = spark.createDataFrame([(datetime.datetime(2015, 4, 8, 13, 8, 15),)], ['ts'])
df.show()
+-------------------+
| ts|
+-------------------+
|2015-04-08 13:08:15|
+-------------------+
# 返回秒
df.select(sf.second('ts').alias('second')).show()
+------+
|second|
+------+
| 15|
+------+
df = spark.createDataFrame([('2015-04-08',)], ['dt'])
df.show()
+----------+
| dt|
+----------+
|2015-04-08|
+----------+
df.select(sf.last_day(df.dt).alias('date')).show()
+----------+
| date|
+----------+
|2015-04-30|
+----------+
df.select(sf.localtimestamp()).collect()
[Row(localtimestamp()=datetime.datetime(2024, 10, 9, 15, 45, 17, 57000))]
“Mon”, “Tue”, “Wed”, “Thu”, “Fri”, “Sat”, “Sun”
# 获取当前时间的下一个周日
df.select(sf.next_day(df.d, 'Sun').alias('date')).show()
+----------+
| date|
+----------+
|2015-08-02|
+----------+
# 获取当前时间的下一个周六
df.select(sf.next_day(df.d, 'Sat').alias('date')).show()
+----------+
| date|
+----------+
|2015-08-01|
+----------+
参数三个column,分别是年月日
df = spark.createDataFrame([(2020, 6, 26)], ['Y', 'M', 'D'])
df.show()
+----+---+---+
| Y| M| D|
+----+---+---+
|2020| 6| 26|
+----+---+---+
df.select(sf.make_date(df.Y, df.M, df.D).alias("datefield")).show()
+----------+
| datefield|
+----------+
|2020-06-26|
+----------+
df.select(sf.make_date(sf.lit('2020'), sf.lit('10'), sf.lit('13')).alias("datefield")).show()
+----------+
| datefield|
+----------+
|2020-10-13|
+----------+
参数:1.要转换的column,2.day format(可选)
col.cast("date")
df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
df.printSchema()
root
|-- t: string (nullable = true)
spark.createDataFrame([('1997-02-28 10:30:00',)], ['t']).collect()
[Row(t='1997-02-28 10:30:00')]
# 转换
df.select(sf.to_date(df.t).alias('date')).collect()
[Row(date=datetime.date(1997, 2, 28))]
# 可以使用format
df.select(sf.to_date(df.t, 'yyyy-MM-dd HH:mm:ss').alias('date')).show()
+----------+
| date|
+----------+
|1997-02-28|
+----------+
‘year’, ‘yyyy’, ‘yy’ to truncate by year, or ‘month’, ‘mon’, ‘mm’ to truncate by month Other options are: ‘week’, ‘quarter’
# 按年截 月日初始
df.select(sf.trunc(df.d, 'year').alias('year')).show()
+----------+
| year|
+----------+
|1997-01-01|
+----------+
# 按月截 日初始
df.select(sf.trunc(df.d, 'mon').alias('year')).show()
+----------+
| year|
+----------+
|1997-02-01|
+----------+
返回日期/时间戳的星期几(0=星期一,1=星期二,…,6=星期日)
f = spark.createDataFrame([('2015-04-08',)], ['dt'])
df.select(weekday('dt').alias('day')).show()
+---+
|day|
+---+
| 2|
+---+
本文系转载,版权归原作者所有,如若侵权请联系我们进行删除!
《数据资产管理白皮书》下载地址:
《行业指标体系白皮书》下载地址:
《数据治理行业实践白皮书》下载地址:
《数栈V6.0产品白皮书》下载地址:
想了解或咨询更多有关袋鼠云大数据产品、行业解决方案、客户案例的朋友,浏览袋鼠云官网:
同时,欢迎对大数据开源项目有兴趣的同学加入「袋鼠云开源框架钉钉技术群」,交流最新开源技术信息,群号码:30537511,项目地址: