Skip to content

Commit cedea99

Browse files
Aakash-Sarkarceladon
authored andcommitted
Add GPU work period support for Xe driver
Signed-off-by: Aakash Sarkar <aakash.deep.sarkar@intel.com>
1 parent afd36ec commit cedea99

8 files changed

Lines changed: 282 additions & 1 deletion

File tree

drivers/gpu/drm/xe/xe_device.c

Lines changed: 71 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <linux/aperture.h>
99
#include <linux/delay.h>
1010
#include <linux/fault-inject.h>
11+
#include <linux/jiffies.h>
1112
#include <linux/units.h>
1213

1314
#include <drm/drm_atomic_helper.h>
@@ -65,9 +66,13 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file)
6566
{
6667
struct xe_device *xe = to_xe_device(dev);
6768
struct xe_drm_client *client;
69+
struct xe_user *user;
6870
struct xe_file *xef;
6971
int ret = -ENOMEM;
72+
unsigned long flags;
73+
unsigned int uid = 0;
7074
struct task_struct *task = NULL;
75+
const struct cred *cred = NULL;
7176

7277
xef = kzalloc(sizeof(*xef), GFP_KERNEL);
7378
if (!xef)
@@ -94,11 +99,41 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file)
9499

95100
task = get_pid_task(rcu_access_pointer(file->pid), PIDTYPE_PID);
96101
if (task) {
102+
cred = get_task_cred(task);
103+
if (cred) {
104+
uid = cred->euid.val;
105+
put_cred(cred);
106+
}
97107
xef->process_name = kstrdup(task->comm, GFP_KERNEL);
98108
xef->pid = task->pid;
99109
put_task_struct(task);
100110
}
101111

112+
spin_lock_irqsave(&xe->work_period.lock, flags);
113+
list_for_each_entry(user, &xe->work_period.user_list, entry) {
114+
if (user->uid == uid) {
115+
xef->user = xe_user_get(user);
116+
goto filelist_add;
117+
}
118+
}
119+
smp_mb();
120+
121+
user = xe_user_alloc(uid);
122+
if (!user) {
123+
goto spin_unlk;
124+
}
125+
126+
user->last_timestamp_ns = ktime_get_raw_ns();
127+
list_add(&user->entry, &xe->work_period.user_list);
128+
xef->user = user;
129+
130+
filelist_add:
131+
spin_lock(&user->filelist_lock);
132+
list_add(&xef->user_link, &user->filelist);
133+
spin_unlock(&user->filelist_lock);
134+
user->xe = xe;
135+
spin_unlk:
136+
spin_unlock_irqrestore(&xe->work_period.lock, flags);
102137
return 0;
103138
}
104139

@@ -113,6 +148,9 @@ static void xe_file_destroy(struct kref *ref)
113148

114149
xe_drm_client_put(xef->client);
115150
kfree(xef->process_name);
151+
152+
list_del(&xef->user_link);
153+
xe_user_put(xef->user);
116154
kfree(xef);
117155
}
118156

@@ -231,6 +269,24 @@ static long xe_drm_compat_ioctl(struct file *file, unsigned int cmd, unsigned lo
231269
#define xe_drm_compat_ioctl NULL
232270
#endif
233271

272+
static void work_period_timer_fn(struct timer_list *timer)
273+
{
274+
struct xe_device *xe = container_of(timer, typeof(*xe), work_period.timer);
275+
struct xe_user *user;
276+
unsigned long timeout = 0;
277+
278+
spin_lock(&xe->work_period.lock);
279+
if (!list_empty(&xe->work_period.user_list)) {
280+
list_for_each_entry(user, &xe->work_period.user_list, entry) {
281+
queue_work(xe->work_period.wq, &user->work);
282+
}
283+
}
284+
spin_unlock(&xe->work_period.lock);
285+
timeout = jiffies + msecs_to_jiffies(500);
286+
287+
mod_timer(timer, timeout);
288+
}
289+
234290
static const struct file_operations xe_driver_fops = {
235291
.owner = THIS_MODULE,
236292
.open = drm_open,
@@ -291,6 +347,11 @@ static void xe_device_destroy(struct drm_device *dev, void *dummy)
291347
if (xe->destroy_wq)
292348
destroy_workqueue(xe->destroy_wq);
293349

350+
if (xe->work_period.wq)
351+
destroy_workqueue(xe->work_period.wq);
352+
353+
del_timer(&xe->work_period.timer);
354+
294355
ttm_device_fini(&xe->ttm);
295356
}
296357

@@ -350,13 +411,22 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
350411
INIT_LIST_HEAD(&xe->pinned.external_vram);
351412
INIT_LIST_HEAD(&xe->pinned.evicted);
352413

414+
spin_lock_init(&xe->work_period.lock);
415+
INIT_LIST_HEAD(&xe->work_period.user_list);
416+
timer_setup(&xe->work_period.timer, work_period_timer_fn, 0);
417+
xe->work_period.timer.expires = jiffies + msecs_to_jiffies(1000);
418+
add_timer(&xe->work_period.timer);
419+
420+
xe->work_period.wq = alloc_workqueue("xe-work-period-wq", 0, 0);
421+
353422
xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq",
354423
WQ_MEM_RECLAIM);
355424
xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0);
356425
xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0);
357426
xe->destroy_wq = alloc_workqueue("xe-destroy-wq", 0, 0);
358427
if (!xe->ordered_wq || !xe->unordered_wq ||
359-
!xe->preempt_fence_wq || !xe->destroy_wq) {
428+
!xe->preempt_fence_wq || !xe->destroy_wq ||
429+
!xe->work_period.wq) {
360430
/*
361431
* Cleanup done in xe_device_destroy via
362432
* drmm_add_action_or_reset register above

drivers/gpu/drm/xe/xe_device_types.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,13 @@ struct xe_device {
405405
struct list_head external_vram;
406406
} pinned;
407407

408+
struct {
409+
spinlock_t lock;
410+
struct timer_list timer;
411+
struct list_head user_list;
412+
struct workqueue_struct *wq;
413+
} work_period;
414+
408415
/** @ufence_wq: user fence wait queue */
409416
wait_queue_head_t ufence_wq;
410417

@@ -619,6 +626,8 @@ struct xe_file {
619626
/** @run_ticks: hw engine class run time in ticks for this drm client */
620627
u64 run_ticks[XE_ENGINE_CLASS_MAX];
621628

629+
u64 active_duration_ns;
630+
622631
/** @client: drm client */
623632
struct xe_drm_client *client;
624633

@@ -634,6 +643,15 @@ struct xe_file {
634643
*/
635644
pid_t pid;
636645

646+
/**
647+
* @user_link: entry into xe_user.filelist list
648+
*/
649+
struct list_head user_link;
650+
/**
651+
*
652+
*/
653+
struct xe_user *user;
654+
637655
/** @refcount: ref count of this xe file */
638656
struct kref refcount;
639657
};

drivers/gpu/drm/xe/xe_drm_client.c

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@
2121
#include "xe_pm.h"
2222
#include "xe_trace.h"
2323

24+
#define CREATE_TRACE_POINTS
25+
#include "xe_power_gpu_work_period_trace.h"
26+
2427
/**
2528
* DOC: DRM Client usage stats
2629
*
@@ -395,3 +398,94 @@ void xe_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file)
395398
show_run_ticks(p, file);
396399
}
397400
#endif
401+
402+
static inline void work_period_worker(struct work_struct *work)
403+
{
404+
struct xe_user *user = container_of(work, struct xe_user, work);
405+
struct xe_device *xe = user->xe;
406+
struct xe_file *xef;
407+
struct xe_exec_queue *q;
408+
u64 last_active_duration, last_timestamp;
409+
u32 gpuid = 0, uid = user->uid;
410+
u64 start_time, end_time, active_duration;
411+
unsigned long i;
412+
413+
last_active_duration = user->active_duration_ns;
414+
last_timestamp = user->last_timestamp_ns;
415+
416+
xe_pm_runtime_get(xe);
417+
418+
if (!list_empty(&user->filelist)) {
419+
spin_lock(&user->filelist_lock);
420+
list_for_each_entry(xef, &user->filelist, user_link) {
421+
422+
wait_var_event(&xef->exec_queue.pending_removal,
423+
!atomic_read(&xef->exec_queue.pending_removal));
424+
425+
/* Accumulate all the exec queues from this client */
426+
mutex_lock(&xef->exec_queue.lock);
427+
xa_for_each(&xef->exec_queue.xa, i, q) {
428+
xe_exec_queue_get(q);
429+
mutex_unlock(&xef->exec_queue.lock);
430+
431+
xe_exec_queue_update_run_ticks(q);
432+
433+
mutex_lock(&xef->exec_queue.lock);
434+
xe_exec_queue_put(q);
435+
}
436+
mutex_unlock(&xef->exec_queue.lock);
437+
user->active_duration_ns += xef->active_duration_ns;
438+
}
439+
spin_unlock(&user->filelist_lock);
440+
}
441+
442+
xe_pm_runtime_put(xe);
443+
444+
start_time = last_timestamp + 1;
445+
end_time = ktime_get_raw_ns();
446+
active_duration = user->active_duration_ns - last_active_duration;
447+
trace_gpu_work_period(gpuid, uid, start_time, end_time, active_duration);
448+
user->last_timestamp_ns = end_time;
449+
}
450+
451+
/**
452+
* xe_user_alloc() - Allocate xe user
453+
* @void: No arg
454+
*
455+
* Allocate xe user struct to track activity on the gpu
456+
* by the application. Call this API whenever a new app
457+
* has opened xe device.
458+
*
459+
* Return: pointer to user struct or NULL if can't allocate
460+
*/
461+
struct xe_user *xe_user_alloc(const unsigned int uid)
462+
{
463+
struct xe_user *user;
464+
465+
user = kzalloc(sizeof(*user), GFP_KERNEL);
466+
if (!user || !uid)
467+
return NULL;
468+
469+
user->uid = uid;
470+
kref_init(&user->kref);
471+
spin_lock_init(&user->filelist_lock);
472+
INIT_LIST_HEAD(&user->filelist);
473+
INIT_LIST_HEAD(&user->entry);
474+
INIT_WORK(&user->work, work_period_worker);
475+
return user;
476+
}
477+
478+
/**
479+
* __xe_user_free() - Free user struct
480+
* @kref: The reference
481+
*
482+
* Return: void
483+
*/
484+
void __xe_user_free(struct kref *kref)
485+
{
486+
struct xe_user *user =
487+
container_of(kref, typeof(*user), kref);
488+
489+
list_del(&user->entry);
490+
kfree(user);
491+
}

drivers/gpu/drm/xe/xe_drm_client.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include <linux/rcupdate.h>
1313
#include <linux/sched.h>
1414
#include <linux/spinlock.h>
15+
#include <linux/workqueue.h>
1516

1617
struct drm_file;
1718
struct drm_printer;
@@ -34,6 +35,18 @@ struct xe_drm_client {
3435
#endif
3536
};
3637

38+
struct xe_user {
39+
struct kref kref;
40+
struct xe_device *xe;
41+
spinlock_t filelist_lock;
42+
struct list_head filelist;
43+
struct list_head entry;
44+
struct work_struct work;
45+
u32 uid;
46+
u64 active_duration_ns;
47+
u64 last_timestamp_ns;
48+
};
49+
3750
static inline struct xe_drm_client *
3851
xe_drm_client_get(struct xe_drm_client *client)
3952
{
@@ -67,4 +80,20 @@ static inline void xe_drm_client_remove_bo(struct xe_bo *bo)
6780
{
6881
}
6982
#endif
83+
84+
struct xe_user *xe_user_alloc(const unsigned int uid);
85+
86+
static inline struct xe_user *
87+
xe_user_get(struct xe_user *user)
88+
{
89+
kref_get(&user->kref);
90+
return user;
91+
}
92+
93+
void __xe_user_free(struct kref *kref);
94+
95+
static inline void xe_user_put(struct xe_user *user)
96+
{
97+
kref_put(&user->kref, __xe_user_free);
98+
}
7099
#endif

drivers/gpu/drm/xe/xe_exec_queue.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
#include "xe_device.h"
1515
#include "xe_gt.h"
16+
#include "xe_gt_clock.h"
1617
#include "xe_hw_engine_class_sysfs.h"
1718
#include "xe_hw_engine_group.h"
1819
#include "xe_hw_fence.h"
@@ -768,6 +769,7 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
768769
{
769770
struct xe_file *xef;
770771
struct xe_lrc *lrc;
772+
struct xe_gt *gt = q->gt;
771773
u32 old_ts, new_ts;
772774

773775
/*
@@ -791,6 +793,7 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
791793
lrc = q->lrc[0];
792794
new_ts = xe_lrc_update_timestamp(lrc, &old_ts);
793795
xef->run_ticks[q->class] += (new_ts - old_ts) * q->width;
796+
xef->active_duration_ns += xe_gt_clock_interval_to_ns(gt, (new_ts - old_ts));
794797
}
795798

796799
/**

drivers/gpu/drm/xe/xe_gt_clock.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,3 +99,8 @@ u64 xe_gt_clock_interval_to_ms(struct xe_gt *gt, u64 count)
9999
{
100100
return div_u64_roundup(count * MSEC_PER_SEC, gt->info.reference_clock);
101101
}
102+
103+
u64 xe_gt_clock_interval_to_ns(struct xe_gt *gt, u64 count)
104+
{
105+
return div_u64_roundup(count * NSEC_PER_SEC, gt->info.reference_clock);
106+
}

drivers/gpu/drm/xe/xe_gt_clock.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,6 @@ struct xe_gt;
1212

1313
int xe_gt_clock_init(struct xe_gt *gt);
1414
u64 xe_gt_clock_interval_to_ms(struct xe_gt *gt, u64 count);
15+
u64 xe_gt_clock_interval_to_ns(struct xe_gt *gt, u64 count);
1516

1617
#endif

0 commit comments

Comments
 (0)