diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index bcb2907fb9e3..5bf448262662 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -65,9 +66,13 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file) { struct xe_device *xe = to_xe_device(dev); struct xe_drm_client *client; + struct xe_user *user; struct xe_file *xef; int ret = -ENOMEM; + unsigned long flags; + unsigned int uid = 0; struct task_struct *task = NULL; + const struct cred *cred = NULL; xef = kzalloc(sizeof(*xef), GFP_KERNEL); if (!xef) @@ -94,11 +99,52 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file) task = get_pid_task(rcu_access_pointer(file->pid), PIDTYPE_PID); if (task) { + cred = get_task_cred(task); + if (cred) { + uid = cred->euid.val; + put_cred(cred); + } xef->process_name = kstrdup(task->comm, GFP_KERNEL); xef->pid = task->pid; put_task_struct(task); } + /* + * Check if the calling process/uid has already been registered + * by the xe driver during a previous file open call. If so then + * take a reference to this xe file and add it to the list of xe + * files belonging to the this process' xe user + */ + spin_lock_irqsave(&xe->work_period.lock, flags); + list_for_each_entry(user, &xe->work_period.user_list, entry) { + if (user->uid == uid) { + xef->user = xe_user_get(user); + goto filelist_add; + } + } + /* list_add could be ordered wrt to list_for_each_entry */ + smp_mb(); + + /* + * We couldn't find a xe user for this process. allocate a new + * xe user structure and register it to the xe driver + */ + user = xe_user_alloc(uid); + if (!user) { + goto spin_unlk; + } + + user->last_timestamp_ns = ktime_get_raw_ns(); + list_add(&user->entry, &xe->work_period.user_list); + xef->user = user; + +filelist_add: + spin_lock(&user->filelist_lock); + list_add(&xef->user_link, &user->filelist); + spin_unlock(&user->filelist_lock); + user->xe = xe; +spin_unlk: + spin_unlock_irqrestore(&xe->work_period.lock, flags); return 0; } @@ -113,6 +159,9 @@ static void xe_file_destroy(struct kref *ref) xe_drm_client_put(xef->client); kfree(xef->process_name); + + list_del(&xef->user_link); + xe_user_put(xef->user); kfree(xef); } @@ -231,6 +280,24 @@ static long xe_drm_compat_ioctl(struct file *file, unsigned int cmd, unsigned lo #define xe_drm_compat_ioctl NULL #endif +static void work_period_timer_fn(struct timer_list *timer) +{ + struct xe_device *xe = container_of(timer, typeof(*xe), work_period.timer); + struct xe_user *user; + unsigned long timeout = 0; + + spin_lock(&xe->work_period.lock); + if (!list_empty(&xe->work_period.user_list)) { + list_for_each_entry(user, &xe->work_period.user_list, entry) { + queue_work(xe->work_period.wq, &user->work); + } + } + spin_unlock(&xe->work_period.lock); + timeout = jiffies + msecs_to_jiffies(500); + + mod_timer(timer, timeout); +} + static const struct file_operations xe_driver_fops = { .owner = THIS_MODULE, .open = drm_open, @@ -291,6 +358,11 @@ static void xe_device_destroy(struct drm_device *dev, void *dummy) if (xe->destroy_wq) destroy_workqueue(xe->destroy_wq); + if (xe->work_period.wq) + destroy_workqueue(xe->work_period.wq); + + del_timer(&xe->work_period.timer); + ttm_device_fini(&xe->ttm); } @@ -350,13 +422,22 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, INIT_LIST_HEAD(&xe->pinned.external_vram); INIT_LIST_HEAD(&xe->pinned.evicted); + spin_lock_init(&xe->work_period.lock); + INIT_LIST_HEAD(&xe->work_period.user_list); + timer_setup(&xe->work_period.timer, work_period_timer_fn, 0); + xe->work_period.timer.expires = jiffies + msecs_to_jiffies(1000); + add_timer(&xe->work_period.timer); + + xe->work_period.wq = alloc_workqueue("xe-work-period-wq", 0, 0); + xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq", WQ_MEM_RECLAIM); xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0); xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0); xe->destroy_wq = alloc_workqueue("xe-destroy-wq", 0, 0); if (!xe->ordered_wq || !xe->unordered_wq || - !xe->preempt_fence_wq || !xe->destroy_wq) { + !xe->preempt_fence_wq || !xe->destroy_wq || + !xe->work_period.wq) { /* * Cleanup done in xe_device_destroy via * drmm_add_action_or_reset register above diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 6a04f975ec16..518723f78add 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -405,6 +405,20 @@ struct xe_device { struct list_head external_vram; } pinned; + /** @work_period: gpu work period event */ + struct { + /** @lock: lock protecting this structure */ + spinlock_t lock; + /** @timer: timer to give periodic interrupts to emit the + * gpu work period event + */ + struct timer_list timer; + /** @user_list: list of xe users using this xe device */ + struct list_head user_list; + /** @wq: workqueue for work period event emitting work */ + struct workqueue_struct *wq; + } work_period; + /** @ufence_wq: user fence wait queue */ wait_queue_head_t ufence_wq; @@ -619,6 +633,9 @@ struct xe_file { /** @run_ticks: hw engine class run time in ticks for this drm client */ u64 run_ticks[XE_ENGINE_CLASS_MAX]; + /** @active_duration_ns: total run time in ns for this drm client */ + u64 active_duration_ns; + /** @client: drm client */ struct xe_drm_client *client; @@ -634,6 +651,15 @@ struct xe_file { */ pid_t pid; + /** + * @user_link: entry into xe_user.filelist list + */ + struct list_head user_link; + /** + * @user: pointer to the xe user struct that opened this xe file + */ + struct xe_user *user; + /** @refcount: ref count of this xe file */ struct kref refcount; }; diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index 298a587da7f1..071465fa06d8 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -21,6 +21,9 @@ #include "xe_pm.h" #include "xe_trace.h" +#define CREATE_TRACE_POINTS +#include "xe_power_gpu_work_period_trace.h" + /** * DOC: DRM Client usage stats * @@ -395,3 +398,109 @@ void xe_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file) show_run_ticks(p, file); } #endif + +/** + * worker thread to emit gpu work period event for this xe user + * @work: work instance for this xe user + * + * Return: void + */ +static inline void work_period_worker(struct work_struct *work) +{ + struct xe_user *user = container_of(work, struct xe_user, work); + struct xe_device *xe = user->xe; + struct xe_file *xef; + struct xe_exec_queue *q; + u64 last_active_duration, last_timestamp; + u32 gpuid = 0, uid = user->uid; + u64 start_time, end_time, active_duration; + unsigned long i; + + last_active_duration = user->active_duration_ns; + last_timestamp = user->last_timestamp_ns; + + xe_pm_runtime_get(xe); + + if (!list_empty(&user->filelist)) { + spin_lock(&user->filelist_lock); + list_for_each_entry(xef, &user->filelist, user_link) { + + /* + * Wait for any exec queue going away: + * their cycles will get updated on context switch out, + * so wait for that to happen + */ + wait_var_event(&xef->exec_queue.pending_removal, + !atomic_read(&xef->exec_queue.pending_removal)); + + /* Accumulate all the exec queues from this xe file */ + mutex_lock(&xef->exec_queue.lock); + xa_for_each(&xef->exec_queue.xa, i, q) { + xe_exec_queue_get(q); + mutex_unlock(&xef->exec_queue.lock); + + xe_exec_queue_update_run_ticks(q); + + mutex_lock(&xef->exec_queue.lock); + xe_exec_queue_put(q); + } + mutex_unlock(&xef->exec_queue.lock); + user->active_duration_ns += xef->active_duration_ns; + } + spin_unlock(&user->filelist_lock); + } + + xe_pm_runtime_put(xe); + + start_time = last_timestamp + 1; + end_time = ktime_get_raw_ns(); + active_duration = user->active_duration_ns - last_active_duration; + /* emit the gpu work period event */ + trace_gpu_work_period(gpuid, uid, start_time, end_time, active_duration); + user->last_timestamp_ns = end_time; +} + +/** + * xe_user_alloc() - Allocate xe user + * @void: No arg + * + * Allocate xe user struct to track activity on the gpu + * by the application. Call this API whenever a new app + * has opened xe device. + * + * Return: pointer to user struct or NULL if can't allocate + */ +struct xe_user *xe_user_alloc(const unsigned int uid) +{ + struct xe_user *user; + + user = kzalloc(sizeof(*user), GFP_KERNEL); + if (!user || !uid) + return NULL; + + user->uid = uid; + kref_init(&user->kref); + spin_lock_init(&user->filelist_lock); + INIT_LIST_HEAD(&user->filelist); + INIT_LIST_HEAD(&user->entry); + INIT_WORK(&user->work, work_period_worker); + return user; +} + +/** + * __xe_user_free() - Free user struct + * @kref: The reference + * + * destroy this xe user when the last xe file associated with + * this xe user is destroyed + * + * Return: void + */ +void __xe_user_free(struct kref *kref) +{ + struct xe_user *user = + container_of(kref, typeof(*user), kref); + + list_del(&user->entry); + kfree(user); +} diff --git a/drivers/gpu/drm/xe/xe_drm_client.h b/drivers/gpu/drm/xe/xe_drm_client.h index a9649aa36011..aa806e77c9b8 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.h +++ b/drivers/gpu/drm/xe/xe_drm_client.h @@ -12,6 +12,7 @@ #include #include #include +#include struct drm_file; struct drm_printer; @@ -34,6 +35,50 @@ struct xe_drm_client { #endif }; +/** + * This is a per process/user id structure for a xe device + * client. It is allocated when a new process/app opens the + * xe device and destroyed when the last xe file for this + * process is destroyed + */ +struct xe_user { + struct kref kref; + struct xe_device *xe; + /** + * @filelist_lock: lock protecting the filelist list + */ + spinlock_t filelist_lock; + /** + * @filelist: list of xe files belonging to this process + */ + struct list_head filelist; + /** + * @entry: entry into the xe.work_period.user_list list + */ + struct list_head entry; + /** + * @work: work to emit the gpu work period event for this xe user + */ + struct work_struct work; + /** + * @uid: user id for this process/app + * + * In android each app has its own user id. So we use uid to identify + * an app that is using the gpu + */ + u32 uid; + /** + * @active_duration_ns: sum total of xe_file.active_duration_ns for all + * xe files belonging to this xe user + */ + u64 active_duration_ns; + /** + * @last_timestamp_ns: timestamp in ns when we last emitted event for + * this xe user + */ + u64 last_timestamp_ns; +}; + static inline struct xe_drm_client * xe_drm_client_get(struct xe_drm_client *client) { @@ -67,4 +112,20 @@ static inline void xe_drm_client_remove_bo(struct xe_bo *bo) { } #endif + +struct xe_user *xe_user_alloc(const unsigned int uid); + +static inline struct xe_user * +xe_user_get(struct xe_user *user) +{ + kref_get(&user->kref); + return user; +} + +void __xe_user_free(struct kref *kref); + +static inline void xe_user_put(struct xe_user *user) +{ + kref_put(&user->kref, __xe_user_free); +} #endif diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index aab9e561153d..b866c1340a39 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -13,6 +13,7 @@ #include "xe_device.h" #include "xe_gt.h" +#include "xe_gt_clock.h" #include "xe_hw_engine_class_sysfs.h" #include "xe_hw_engine_group.h" #include "xe_hw_fence.h" @@ -768,6 +769,7 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) { struct xe_file *xef; struct xe_lrc *lrc; + struct xe_gt *gt = q->gt; u32 old_ts, new_ts; /* @@ -791,6 +793,7 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) lrc = q->lrc[0]; new_ts = xe_lrc_update_timestamp(lrc, &old_ts); xef->run_ticks[q->class] += (new_ts - old_ts) * q->width; + xef->active_duration_ns += xe_gt_clock_interval_to_ns(gt, (new_ts - old_ts)); } /** diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c index cc2ae159298e..5fc91b5ae199 100644 --- a/drivers/gpu/drm/xe/xe_gt_clock.c +++ b/drivers/gpu/drm/xe/xe_gt_clock.c @@ -99,3 +99,8 @@ u64 xe_gt_clock_interval_to_ms(struct xe_gt *gt, u64 count) { return div_u64_roundup(count * MSEC_PER_SEC, gt->info.reference_clock); } + +u64 xe_gt_clock_interval_to_ns(struct xe_gt *gt, u64 count) +{ + return div_u64_roundup(count * NSEC_PER_SEC, gt->info.reference_clock); +} diff --git a/drivers/gpu/drm/xe/xe_gt_clock.h b/drivers/gpu/drm/xe/xe_gt_clock.h index 3adeb7baaca4..bd87971bce97 100644 --- a/drivers/gpu/drm/xe/xe_gt_clock.h +++ b/drivers/gpu/drm/xe/xe_gt_clock.h @@ -12,5 +12,6 @@ struct xe_gt; int xe_gt_clock_init(struct xe_gt *gt); u64 xe_gt_clock_interval_to_ms(struct xe_gt *gt, u64 count); +u64 xe_gt_clock_interval_to_ns(struct xe_gt *gt, u64 count); #endif diff --git a/drivers/gpu/drm/xe/xe_power_gpu_work_period_trace.h b/drivers/gpu/drm/xe/xe_power_gpu_work_period_trace.h new file mode 100644 index 000000000000..f32390d7bd3e --- /dev/null +++ b/drivers/gpu/drm/xe/xe_power_gpu_work_period_trace.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: MIT */ +/* +* Copyright © 2024 Intel Corporation +*/ + +#ifndef _TRACE_POWER_GPU_WORK_PERIOD_INTEL +#define _TRACE_POWER_GPU_WORK_PERIOD_INTEL +#endif + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM power +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE xe_power_gpu_work_period_trace +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . + +#if !defined(_TRACE_POWER_GPU_WORK_PERIOD_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_POWER_GPU_WORK_PERIOD_H + +#include + +TRACE_EVENT(gpu_work_period, + + TP_PROTO( + u32 gpu_id, + u32 uid, + u64 start_time_ns, + u64 end_time_ns, + u64 total_active_duration_ns + ), + + TP_ARGS(gpu_id, uid, start_time_ns, end_time_ns, total_active_duration_ns), + + TP_STRUCT__entry( + __field(u32, gpu_id) + __field(u32, uid) + __field(u64, start_time_ns) + __field(u64, end_time_ns) + __field(u64, total_active_duration_ns) + ), + + TP_fast_assign( + __entry->gpu_id = gpu_id; + __entry->uid = uid; + __entry->start_time_ns = start_time_ns; + __entry->end_time_ns = end_time_ns; + __entry->total_active_duration_ns = total_active_duration_ns; + ), + + TP_printk("gpu_id=%u uid=%u start_time_ns=%llu end_time_ns=%llu total_active_duration_ns=%llu", + __entry->gpu_id, + __entry->uid, + __entry->start_time_ns, + __entry->end_time_ns, + __entry->total_active_duration_ns) +); + +#endif /* _TRACE_POWER_GPU_WORK_PERIOD_H */ + +/* This part must be outside protection */ +#include